diff options
Diffstat (limited to 'lib/gcstar/GCPlugins')
121 files changed, 34677 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCPluginsBase.pm b/lib/gcstar/GCPlugins/GCPluginsBase.pm new file mode 100644 index 0000000..728e23a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCPluginsBase.pm @@ -0,0 +1,396 @@ +package GCPlugins::GCPluginsBase; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +{ + package GCPluginParser; + use base qw(HTML::Parser); + use LWP::Simple qw($ua); + use HTTP::Cookies::Netscape; + use URI::Escape; + use HTML::Entities; + use Encode; + use File::Spec; + + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $ua->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041111 Firefox/1.0'); + $ua->default_header('Accept-Encoding' => 'x-gzip'); + $ua->default_header('Accept' => 'text/html'); + $self->{ua} = $ua; + + $self->{itemIdx} = -1; + $self->{itemsList} = (); + + bless ($self, $class); + return $self; + } + + sub getItemsNumber + { + my ($self) = @_; + + return $self->{itemIdx} + 1; + } + + sub getItems + { + my ($self) = @_; + return @{$self->{itemsList}}; + } + + sub load + { + my $self = shift; + + $self->checkProxy; + $self->checkCookieJar; + + $self->{itemIdx} = -1; + $self->{isInfo} = 0; + $self->{itemsList} = (); + + #my $word = uri_escape_utf8($self->{title}); + my $title2 = encode($self->getSearchCharset, $self->{title}); + my $word = uri_escape($title2); + $word =~ s/%20/+/g; + + my $post; + my $html; + + # For multi-pass plugins, the plugin will have set the url to load for + # the next pass as nextUrl. If this doesn't exist, we're either on the + # first pass, or only using a one-pass plugin, so call getSearchUrl + # to find the url to retrieve + if ($self->{nextUrl}) + { + $html = $self->loadPage($self->{nextUrl}); + } + else + { + $html = $self->loadPage($self->getSearchUrl($word)); + } + + $self->{parsingList} = 1; + $html = $self->preProcess($html); + decode_entities($html) + if $self->decodeEntitiesWanted; + $self->{inside} = undef; + $self->parse($html); + + my @noConversion = @{$self->getNotConverted}; + foreach my $item (@{$self->{itemsList}}) + { + foreach (keys %{$item}) + { + next if $_ eq 'url'; + $item->{$_} = $self->convertCharset($item->{$_}) + if ! GCUtils::inArrayTest($_, @noConversion); + } + } + + } + + sub loadPage + { + my ($self, $url, $post, $noSave) = @_; + my $debugPhase = $ENV{GCS_DEBUG_PLUGIN_PHASE}; + my $debugFile; + $debugFile = File::Spec->tmpdir.'/'.GCUtils::getSafeFileName($url) + if ($debugPhase > 0); + $self->{loadedUrl} = $url if ! $noSave; + my $response; + my $result; + if ($debugPhase != 2) + { + if ($post) + { + $response = $ua->post($url, $post); + } + else + { + $response = $ua->get($url); + } + + #UnclePetros 03/07/2011: + #code to handle correctly 302 response messages + my $label1 = $response->code; + if($response->code == '302'){ + my $location = $response->header("location"); + $response = $ua->get($location); + $self->{loadedUrl} = $location; + } + + eval { + $result = $response->decoded_content; + }; + if ($debugPhase == 1) + { + open DEBUG_FILE, ">$debugFile"; + print DEBUG_FILE ($result || $response->content); + close DEBUG_FILE; + } + } + else + { + local $/; + open DEBUG_FILE, "$debugFile"; + $result = <DEBUG_FILE>; + utf8::decode($result); + } + return $result || ($response && $response->content); + } + + sub capWord + { + my ($self, $msg) = @_; + + use locale; + + (my $newmsg = lc $msg) =~ s/(\s|,|^)(\w)(\w)(\w*?)/$1\U$2\E$3$4/gi; + return $newmsg; + } + + sub getSearchFieldsArray + { + return ['']; + } + + sub getSearchFields + { + my ($self, $model) = @_; + + my $result = ''; + $result .= $model->getDisplayedLabel($_).', ' foreach (@{$self->getSearchFieldsArray}); + $result =~ s/, $//; + return $result; + } + + sub hasField + { + my ($self, $field) = @_; + + return $self->{hasField}->{$field}; + } + + sub getExtra + { + return ''; + } + + # Character set for web page text + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + # Character set for encoding search term, can sometimes be different + # to the page encoding, but we default to the same as the page set + sub getSearchCharset + { + my $self = shift; + + return getCharset; + } + + # For some plugins, we need extra checks to determine if urls match + # the language the plugin is written for. This allows us to correctly determine + # if a drag and dropped url is handled by a particular plugin. If these + # checks are necessary, return 1, and make sure plugin handles the + # the testURL function correctly + sub needsLanguageTest + { + return 0; + } + + # Used to test if a given url is handled by the plugin. Only required if + # needsLanguageTest is true. + sub testURL + { + my ($self, $url) = @_; + return 1 + } + + # Determines whether plugin should be the default plugins gcstar uses. + # Plugins with this attribute set will appear first in plugin list, + # and will be highlighted with a star icon. A returned value of 1 + # means the plugin is preferred if it's language matches the user's language, + # a returned value of 2 mean's it's preferred regardless of the language. + sub isPreferred + { + return 0; + } + + sub getPreferred + { + return isPreferred; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub decodeEntitiesWanted + { + return 1; + } + + sub getDefaultPictureSuffix + { + return ''; + } + + sub convertCharset + { + my ($self, $value) = @_; + + my $result = $value; + if (ref($value) eq 'ARRAY') + { + foreach my $line(@{$value}) + { + my $i = 0; + map {$_ = decode($self->getCharset, $_)} @{$line}; + } + } + else + { + eval { + $result = decode($self->getCharset, $result); + }; + } + return $result; + } + + sub getItemInfo + { + my $self = shift; + + eval { + $self->init; + }; + my $idx = $self->{wantedIdx}; + my $url = $self->getItemUrl($self->{itemsList}[$idx]->{url}); + $self->loadUrl($url); + return $self->{curInfo}; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub loadUrl + { + my ($self, $url) = @_; + $self->checkProxy; + $self->checkCookieJar; + my $realUrl = $self->changeUrl($url); + my $html = $self->loadPage($realUrl); + $self->{parsingList} = 0; + #$html = $self->convertCharset($html); + $self->{curInfo} = {}; + + $html = $self->preProcess($html); + decode_entities($html) + if $self->decodeEntitiesWanted; + + $self->{curInfo}->{$self->{urlField}} = $url; + $self->{inside} = undef; + $self->parse($html); + + my @noConversion = @{$self->getNotConverted}; + foreach (keys %{$self->{curInfo}}) + { + next if $_ eq $self->{urlField}; + $self->{curInfo}->{$_} = $self->convertCharset($self->{curInfo}->{$_}) + if ! GCUtils::inArrayTest($_, @noConversion); + if (ref($self->{curInfo}->{$_}) ne 'ARRAY') + { + $self->{curInfo}->{$_} =~ s/\|/,/gm; + $self->{curInfo}->{$_} =~ s/\r//gm; + $self->{curInfo}->{$_} =~ s/[ \t]*$//gm; + } + } + $self->{curInfo}->{$self->{urlField}} .= $GCModel::linkNameSeparator.$self->getName; + return $self->{curInfo}; + } + + sub setProxy + { + my ($self, $proxy) = @_; + + $self->{proxy} = $proxy; + } + + sub checkProxy + { + my $self = shift; + $ua->proxy(['http'], $self->{proxy}); + #$self->{ua}->proxy(['http'], $self->{proxy}); + } + + sub setCookieJar + { + my ($self, $cookieJar) = @_; + $self->{cookieJar} = $cookieJar; + } + + sub checkCookieJar + { + my $self = shift; + $ua->cookie_jar(HTTP::Cookies::Netscape->new( + 'file' => "$self->{cookieJar}", + 'autosave' => 1,)); + } + + # Used to set the number of passes the plugin requires + sub getNumberPasses + { + # Most plugins only need to search once, so default to one pass + return 1; + } + + # Returns undef if it doesn't support search using barcode scanner + sub getEanField + { + return undef; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVepisodes/GCTVepisodesCommon.pm b/lib/gcstar/GCPlugins/GCTVepisodes/GCTVepisodesCommon.pm new file mode 100644 index 0000000..478f1b8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVepisodes/GCTVepisodesCommon.pm @@ -0,0 +1,67 @@ +package GCPlugins::GCTVepisodes::GCTVepisodesCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +our $MAX_ACTORS = 10; +our $MAX_DIRECTORS = 4; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCTVepisodes::GCTVepisodesPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['series']; + } + + sub loadUrl + { + my ($self, $url) = @_; + + $self->SUPER::loadUrl($url); + + if (! $self->{curInfo}->{title} && $self->{curInfo}->{original}) + { + $self->{curInfo}->{title} = $self->{curInfo}->{original}; + $self->{curInfo}->{original} = ''; + } + return $self->{curInfo}; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdb.pm b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdb.pm new file mode 100644 index 0000000..f275ca5 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdb.pm @@ -0,0 +1,360 @@ +package GCPlugins::GCTVepisodes::GCTvdb; + +################################################### +# +# Copyright 2005-2007 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVepisodes::GCTVepisodesCommon; + + +{ + package GCPlugins::GCTVepisodes::GCPluginTvdb; + + use base qw(GCPlugins::GCTVepisodes::GCTVepisodesPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{pass} eq 1) + { + # Searching on series name + $xml = $xs->XMLin( + $page, + ForceArray => ['Series'], + KeyAttr => [] + ); + + foreach my $series ( @{$xml->{Series}}) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{nextUrl} = "http://www.thetvdb.com/api/A8CC4AF70D0385F3/series/".$series->{id}."/all/".$self->siteLanguage().".xml"; + $self->{itemsList}[$self->{itemIdx}]->{name} = $series->{SeriesName}; + $self->{itemsList}[$self->{itemIdx}]->{firstaired} = $series->{FirstAired}; + } + } + else + { + if ($self->{parsingList}) + { + # Searching on episodes + $xml = $xs->XMLin( + $page, + ForceArray => ['Episode'], + KeyAttr => [], + + ); + + foreach my $episode (@{$xml->{Episode}}) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{name} = $episode->{EpisodeName} + if (!ref($episode->{EpisodeName})); + $self->{itemsList}[$self->{itemIdx}]->{season} = $episode->{SeasonNumber}; + $self->{itemsList}[$self->{itemIdx}]->{episode} = $episode->{EpisodeNumber}; + $self->{itemsList}[$self->{itemIdx}]->{director} = $episode->{Director} + if (!ref($episode->{Director})); + $self->{itemsList}[$self->{itemIdx}]->{writer} = $episode->{Writer} + if (!ref($episode->{Writer})); + $self->{itemsList}[$self->{itemIdx}]->{overview} = $episode->{Overview} + if (!ref($episode->{Overview})); + $self->{itemsList}[$self->{itemIdx}]->{image} = "http://thetvdb.com/banners/".$episode->{filename} + if (!ref($episode->{filename})); + $self->{itemsList}[$self->{itemIdx}]->{series} = $xml->{Series}->{SeriesName} + if (!ref($xml->{Series}->{SeriesName})); + $self->{itemsList}[$self->{itemIdx}]->{firstaired} = $xml->{Series}->{FirstAired} + if (!ref($xml->{Series}->{FirstAired})); + $self->{itemsList}[$self->{itemIdx}]->{actors} = $xml->{Series}->{Actors} + if (!ref($xml->{Series}->{Actors})); + $self->{itemsList}[$self->{itemIdx}]->{genre} = $xml->{Series}->{Genre} + if (!ref($xml->{Series}->{Genre})); + $self->{itemsList}[$self->{itemIdx}]->{runtime} = $xml->{Series}->{Runtime} + if (!ref($xml->{Series}->{Runtime})); + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.thetvdb.com/?tab=episode&seriesid=".$episode->{seriesid}."&seasonid=".$episode->{seasonid}."&id=".$episode->{id}."&lid=".$self->siteLanguageCode(); + } + } + elsif ($self->{pass} != 2) + { + # Process a given url + $xml = $xs->XMLin( + $page, + ForceArray => ['Episode'], + KeyAttr => [], + ); + + # Need to grab the generic series info too + my $response = $ua->get('http://www.thetvdb.com/api/A8CC4AF70D0385F3/series/'.$xml->{Episode}[0]->{seriesid}.'/'.$self->siteLanguage().'.xml'); + my $result; + eval { + $result = $response->decoded_content; + }; + my $seriesxml = $xs->XMLin( + $result, + ForceArray => ['Series'], + KeyAttr => [], + ); + $self->{curInfo}->{season} = $xml->{Episode}[0]->{SeasonNumber}; + $self->{curInfo}->{episode} = $xml->{Episode}[0]->{EpisodeNumber}; + $self->{curInfo}->{name} = $xml->{Episode}[0]->{EpisodeName}; + $self->{curInfo}->{series} = $seriesxml->{Series}[0]->{SeriesName}; + $self->{curInfo}->{director} = $xml->{Episode}[0]->{Director} + if (!ref($xml->{Episode}[0]->{Director})); + $self->{curInfo}->{director} =~ s/^\|//; + $self->{curInfo}->{director} =~ s/\|$//; + $self->{curInfo}->{firstaired} = $xml->{Episode}[0]->{FirstAired} + if (!ref($xml->{Episode}[0]->{FirstAired})); + $self->{curInfo}->{writer} = $xml->{Episode}[0]->{Writer} + if (!ref($xml->{Episode}[0]->{Writer})); + $self->{curInfo}->{writer} =~ s/^\|//; + $self->{curInfo}->{writer} =~ s/\|$//; + + if (!ref($seriesxml->{Series}[0]->{Actors})) + { + my $actorString = $seriesxml->{Series}[0]->{Actors}; + $actorString =~ s/^\|//; + $actorString =~ s/\|$//; + for my $actor (split(/\|/, $actorString)) + { + push @{$self->{curInfo}->{actors}}, [$actor]; + } + } + + if (!ref($seriesxml->{Series}[0]->{Genre})) + { + my $genreString = $seriesxml->{Series}[0]->{Genre}; + $genreString =~ s/^\|//; + $genreString =~ s/\|$//; + for my $genre (split(/\|/, $genreString)) + { + push @{$self->{curInfo}->{genre}}, [$genre]; + } + } + + $self->{curInfo}->{time} = $seriesxml->{Series}[0]->{Runtime}; + $self->{curInfo}->{image} = "http://thetvdb.com/banners/".$xml->{Episode}[0]->{filename} + if (!ref($xml->{Episode}[0]->{filename})); + $self->{curInfo}->{synopsis} = $xml->{Episode}[0]->{Overview} + if (!ref($xml->{Episode}[0]->{Overview})); + $self->{curInfo}->{webPage} = "http://www.thetvdb.com/?tab=episode&seriesid=".$xml->{Episode}[0]->{seriesid}."&seasonid=".$xml->{Episode}[0]->{seasonid}."&id=".$xml->{Episode}[0]->{id}."&lid=".$self->siteLanguageCode(); + } + else + { + $self->{curInfo}->{season} = $self->{itemsList}[$self->{wantedIdx}]->{season}; + $self->{curInfo}->{episode} = $self->{itemsList}[$self->{wantedIdx}]->{episode}; + $self->{curInfo}->{name} = $self->{itemsList}[$self->{wantedIdx}]->{name}; + $self->{curInfo}->{series} = $self->{itemsList}[$self->{wantedIdx}]->{series}; + $self->{curInfo}->{director} = $self->{itemsList}[$self->{wantedIdx}]->{director}; + $self->{curInfo}->{director} =~ s/^\|//; + $self->{curInfo}->{director} =~ s/\|$//; + $self->{curInfo}->{firstaired} = $self->{itemsList}[$self->{wantedIdx}]->{firstaired}; + $self->{curInfo}->{writer} = $self->{itemsList}[$self->{wantedIdx}]->{writer}; + $self->{curInfo}->{writer} =~ s/^\|//; + $self->{curInfo}->{writer} =~ s/\|$//; + + my $actorString = $self->{itemsList}[$self->{wantedIdx}]->{actors}; + $actorString =~ s/^\|//; + $actorString =~ s/\|$//; + for my $actor (split(/\|/, $actorString)) + { + push @{$self->{curInfo}->{actors}}, [$actor]; + } + + my $genreString = $self->{itemsList}[$self->{wantedIdx}]->{genre}; + $genreString =~ s/^\|//; + $genreString =~ s/\|$//; + for my $genre (split(/\|/, $genreString)) + { + push @{$self->{curInfo}->{genre}}, [$genre]; + } + $self->{curInfo}->{time} = $self->{itemsList}[$self->{wantedIdx}]->{runtime}; + $self->{curInfo}->{image} = $self->{itemsList}[$self->{wantedIdx}]->{image}; + $self->{curInfo}->{synopsis} = $self->{itemsList}[$self->{wantedIdx}]->{overview}; + $self->{curInfo}->{webPage} = $self->{itemsList}[$self->{wantedIdx}]->{url}; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.thetvdb.com/api/GetSeries.php?seriesname=$word&language=".$self->siteLanguage(); + } + + sub getItemUrl + { + my ($self, $url) = @_; + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.thetvdb.com"; + } + elsif (index($url, "api") < 0) + { + # Url isn't for the tvdb api, so we need to find the episode id + # and return a url corresponding to the api page for this movie + + $url =~ /[\?&]id=([0-9]+)*/; + my $id = $1; + $url = "http://www.thetvdb.com/api/A8CC4AF70D0385F3/episodes/$id/".$self->siteLanguage().".xml"; + } + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 2; + } + + sub getName + { + return "Tvdb"; + } + + sub needsLanguageTest + { + return 1; + } + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + if ($self->{pass} == 1) + { + $self->{hasField} = { + name => 1, + firstaired => 1, + }; + } + else + { + $self->{hasField} = { + name => 1, + season => 1, + episode => 1, + }; + } + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'en'; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + + sub siteLanguageCode + { + my $self = shift; + + return 7; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbES.pm b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbES.pm new file mode 100644 index 0000000..dc753f1 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbES.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCTVepisodes::GCTvdbES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVepisodes::GCTvdb; + + +{ + package GCPlugins::GCTVepisodes::GCPluginTvdbES; + + use base qw(GCPlugins::GCTVepisodes::GCPluginTvdb); + + sub getLang + { + return 'ES'; + } + + sub getName + { + return "Tvdb ES"; + } + + sub siteLanguage + { + my $self = shift; + + return 'es'; + } + + sub siteLanguageCode + { + my $self = shift; + + return 16; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbFR.pm b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbFR.pm new file mode 100644 index 0000000..791cee7 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbFR.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCTVepisodes::GCTvdbFR; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVepisodes::GCTvdb; + + +{ + package GCPlugins::GCTVepisodes::GCPluginTvdbFR; + + use base qw(GCPlugins::GCTVepisodes::GCPluginTvdb); + + sub getLang + { + return 'FR'; + } + + sub getName + { + return "Tvdb FR"; + } + + sub siteLanguage + { + my $self = shift; + + return 'fr'; + } + + sub siteLanguageCode + { + my $self = shift; + + return 17; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbIT.pm b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbIT.pm new file mode 100644 index 0000000..c3eab7b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVepisodes/GCTvdbIT.pm @@ -0,0 +1,60 @@ +package GCPlugins::GCTVepisodes::GCTvdbIT; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVepisodes::GCTvdb; + + +{ + package GCPlugins::GCTVepisodes::GCPluginTvdbIT; + + use base qw(GCPlugins::GCTVepisodes::GCPluginTvdb); + + sub getLang + { + return 'IT'; + } + + sub getName + { + return "Tvdb IT"; + } + + sub siteLanguage + { + my $self = shift; + + return 'it'; + } + + sub siteLanguageCode + { + my $self = shift; + + return 15; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVseries/GCTVseriesCommon.pm b/lib/gcstar/GCPlugins/GCTVseries/GCTVseriesCommon.pm new file mode 100644 index 0000000..119b1df --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVseries/GCTVseriesCommon.pm @@ -0,0 +1,53 @@ +package GCPlugins::GCTVseries::GCTVseriesCommon; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +our $MAX_ACTORS = 10; +our $MAX_DIRECTORS = 4; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCTVseries::GCTVseriesPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['series']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVseries/GCThemoviedb.pm b/lib/gcstar/GCPlugins/GCTVseries/GCThemoviedb.pm new file mode 100644 index 0000000..e4d4eb2 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVseries/GCThemoviedb.pm @@ -0,0 +1,340 @@ +package GCPlugins::GCTVseries::GCthemoviedb; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCTVseries::GCTVseriesCommon; + +{ + + package GCPlugins::GCTVseries::GCPluginThemoviedb; + + use base 'GCPlugins::GCTVseries::GCTVseriesPluginsBase'; + use XML::Simple; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page !~ m/>Nothing found.<\/movie/) + { + $xml = $xs->XMLin( + $page, + ForceArray => [ 'movie', 'alternative_name' ], + KeyAttr => ['id'] + ); + my $movie; + foreach $movie (keys(%{$xml->{'movies'}->{'movie'}})) + { + # We only want movies, not series and everything else the api returns + if ($xml->{'movies'}->{'movie'}->{$movie}->{'type'} eq "movie") + { + $self->{itemIdx}++; + my $url = "http://api.themoviedb.org/2.1/Movie.getInfo/en/xml/9fc8c3894a459cac8c75e3284b712dfc/" . $movie; + # If the release date is missing, it will be returned as an array, so only save the release if + # it's not an array + my $released = ""; + if (!ref($xml->{'movies'}->{'movie'}->{$movie}->{'released'})) + { + $released = $xml->{'movies'}->{'movie'}->{$movie}->{'released'}; + } + $self->{itemsList}[ $self->{itemIdx} ]->{firstaired} = $released; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = + $xml->{'movies'}->{'movie'}->{$movie}->{'name'}; + # Now, check if there's any alternative names, and if so, add them in as + # additional search results. + for my $alternateName ( + @{$xml->{'movies'}->{'movie'}->{$movie}->{alternative_name}}) + { + if (!ref($alternateName)) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{firstaired} = $released; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $alternateName; + } + } + + } + } + } + } + else + { + $xml = $xs->XMLin( + $page, + ForceArray => [ 'country', 'person', 'category', 'size', 'alternative_name' ], + KeyAttr => [''] + ); + + if ( + ( + $xml->{movies}->{movie}->{name} ne + $self->{itemsList}[ $self->{wantedIdx} ]->{title} + ) + && ($self->{itemsList}[ $self->{wantedIdx} ]->{title}) + ) + { + # Name returned by tmdb is different to the one the user selected + # this means they choose an translated name, so use the name they choose + # as the default + $self->{curInfo}->{title} = $self->{itemsList}[ $self->{wantedIdx} ]->{title}; + } + else + { + $self->{curInfo}->{title} = $xml->{movies}->{movie}->{name}; + } + + # Try and guess the series name + $xml->{movies}->{movie}->{name} =~ /^(.*):/; + if ($1 ne '') + { + $self->{curInfo}->{series} = $1; + } + else + { + $self->{curInfo}->{series} = $xml->{movies}->{movie}->{name}; + } + + # Set season to 0 and set special flag + $self->{curInfo}->{season} = 0; + $self->{curInfo}->{specialep} = 1; + + $self->{curInfo}->{webPage} = $xml->{movies}->{movie}->{url}; + + # The following fields could be missing from the xml, so we need to check if they're blank + # (in which case they'll be a array) + $self->{curInfo}->{synopsis} = $xml->{movies}->{movie}->{overview} + if (!ref($xml->{movies}->{movie}->{overview})); + $self->{curInfo}->{ratingpress} = $xml->{movies}->{movie}->{rating} + if (!ref($xml->{movies}->{movie}->{rating})); + $self->{curInfo}->{firstaired} = $xml->{movies}->{movie}->{released} + if (!ref($xml->{movies}->{movie}->{released})); + $self->{curInfo}->{time} = $xml->{movies}->{movie}->{runtime} . " mins" + if (!ref($xml->{movies}->{movie}->{runtime})); + + if (!ref($xml->{movies}->{movie}->{certification})) + { + my $certification; + $certification = $xml->{movies}->{movie}->{certification}; + $self->{curInfo}->{age} = 1 + if ($certification eq 'Unrated') || ($certification eq 'Open'); + $self->{curInfo}->{age} = 2 + if ($certification eq 'G') || ($certification eq 'Approved'); + $self->{curInfo}->{age} = 5 + if ($certification eq 'PG') + || ($certification eq 'M') + || ($certification eq 'GP'); + $self->{curInfo}->{age} = 13 if $certification eq 'PG-13'; + $self->{curInfo}->{age} = 17 if $certification eq 'R'; + $self->{curInfo}->{age} = 18 + if ($certification eq 'NC-17') || ($certification eq 'X'); + } + + for my $country (@{$xml->{movies}->{movie}->{countries}->{country}}) + { + $self->{curInfo}->{country} .= $country->{name} . ', '; + } + $self->{curInfo}->{country} =~ s/, $//; + for my $person (@{$xml->{movies}->{movie}->{cast}->{person}}) + { + my $name = $person->{name}; + # Strip any blank spaces from start and end of name + $name =~ s/\s*$//; + $name =~ s/^\s*//; + if ($person->{job} eq "Director") + { + $self->{curInfo}->{director} .= $name . ', '; + } + if ($person->{job} eq "Producer") + { + $self->{curInfo}->{producer} .= $name . ', '; + } + if ($person->{job} eq "Music") + { + $self->{curInfo}->{music} .= $name . ', '; + } + elsif ($person->{job} eq "Actor") + { + if ($self->{actorsCounter} < $GCPlugins::GCTVseries::GCTVseriesCommon::MAX_ACTORS) + { + + push @{$self->{curInfo}->{actors}}, [$name]; + my $role = $person->{character}; + $role =~ s/\s*$//; + $role =~ s/^\s*//; + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} ]}, $role; + $self->{actorsCounter}++; + } + } + } + $self->{curInfo}->{director} =~ s/, $//; + $self->{curInfo}->{producer} =~ s/, $//; + $self->{curInfo}->{music} =~ s/, $//; + for my $category (@{$xml->{movies}->{movie}->{categories}->{category}}) + { + push @{$self->{curInfo}->{genre}}, [ $category->{name} ] + if ($category->{type} eq 'genre'); + } + for my $image (@{$xml->{movies}->{movie}->{images}->{image}}) + { + if ($image->{type} eq "poster") + { + # Fetch either the big original pic, or just the small thumbnail pic + if ( (($self->{bigPics}) && ($image->{size} eq "original")) + || (!($self->{bigPics}) && ($image->{size} eq "thumb"))) + { + if (!$self->{curInfo}->{image}) + { + $self->{curInfo}->{image} = $image->{url}; + } + } + } + } + + # We have to return something as the name, even though this field will get automatically + # calculated for tv series collections. + $self->{curInfo}->{name} = "temp"; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + firstaired => 1 + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.themoviedb.org"; + } + elsif (index($url, "api") < 0) + { + # Url isn't for the movie db api, so we need to find the movie id + # and return a url corresponding to the api page for this movie + my $found = index(reverse($url), "/"); + if ($found >= 0) + { + my $id = substr(reverse($url), 0, $found); + $url = +"http://api.themoviedb.org/2.1/Movie.getInfo/en/xml/9fc8c3894a459cac8c75e3284b712dfc/" + . reverse($id); + } + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return +"http://api.themoviedb.org/2.1/Movie.search/en/xml/9fc8c3894a459cac8c75e3284b712dfc/$word"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getName + { + return "The Movie DB"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVseries/GCTvdb.pm b/lib/gcstar/GCPlugins/GCTVseries/GCTvdb.pm new file mode 100644 index 0000000..ca4dc68 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVseries/GCTvdb.pm @@ -0,0 +1,466 @@ +package GCPlugins::GCTVseries::GCTvdb; + +################################################### +# +# Copyright 2005-2007 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVseries::GCTVseriesCommon; + + +{ + package GCPlugins::GCTVseries::GCPluginTvdb; + + use base qw(GCPlugins::GCTVseries::GCTVseriesPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{pass} eq 1) + { + $xml = $xs->XMLin( + $page, + ForceArray => ['Series'], + KeyAttr => [] + ); + + foreach my $series ( @{$xml->{Series}}) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{nextUrl} = "http://www.thetvdb.com/api/A8CC4AF70D0385F3/series/".$series->{id}."/all/".$self->siteLanguage().".xml"; + $self->{itemsList}[$self->{itemIdx}]->{series} = $series->{SeriesName}; + $self->{itemsList}[$self->{itemIdx}]->{firstaired} = $series->{FirstAired}; + } + } + else + { + if ($self->{parsingList}) + { + # Searching on episodes + $xml = $xs->XMLin( + $page, + ForceArray => ['Episode'], + KeyAttr => [], + ); + + # Need to grab the banners info too + my $response = $ua->get('http://www.thetvdb.com/api/A8CC4AF70D0385F3/series/'.$xml->{Episode}[0]->{seriesid}.'/banners.xml'); + my $result; + eval { + $result = $response->decoded_content; + }; + my $bannersxml = $xs->XMLin( + $result, + ForceArray => ['Banner'], + KeyAttr => [], + ); + + my @seasonNumbers; + foreach my $episode (@{$xml->{Episode}}) + { + if (!grep(/\b$episode->{SeasonNumber}\b/,@seasonNumbers)) + { + push (@seasonNumbers, $episode->{SeasonNumber}); + $self->{itemIdx}++; + + $self->{itemsList}[$self->{itemIdx}]->{series} = $xml->{Series}->{SeriesName} + if (!ref($xml->{Series}->{SeriesName})); + $self->{itemsList}[$self->{itemIdx}]->{season} = $episode->{SeasonNumber}; + $self->{itemsList}[$self->{itemIdx}]->{overview} = $xml->{Series}->{Overview} + if (!ref($xml->{Series}->{Overview})); + + # Find banner + foreach my $banner (@{$bannersxml->{Banner}}) + { + if ($banner->{Season} == $episode->{SeasonNumber}) + { + $self->{itemsList}[$self->{itemIdx}]->{image} = "http://thetvdb.com/banners/".$banner->{BannerPath} + if (!$self->{itemsList}[$self->{itemIdx}]->{image}); + } + } + + my $seasonEpisodes; + # Episodes + my $episodePos = 0; + foreach my $checkEpisode (@{$xml->{Episode}}) + { + if (($checkEpisode->{EpisodeNumber} != 0) || (!ref($checkEpisode->{EpisodeName}))) + { + # Prefer dvd episode numbers + if (($checkEpisode->{DVD_season} == $episode->{SeasonNumber}) + || ((ref($checkEpisode->{DVD_season})) && ($checkEpisode->{SeasonNumber} == $episode->{SeasonNumber}))) + { + if (ref($checkEpisode->{DVD_episodenumber})) + { + push (@{$seasonEpisodes},[ $checkEpisode->{EpisodeNumber}]); + } + else + { + my $trimmedEpNumber = $checkEpisode->{DVD_episodenumber}; + $trimmedEpNumber =~ /^(\d*)/; + push (@{$seasonEpisodes},[ $1]); + } + + push @{$seasonEpisodes->[ $episodePos ]}, $checkEpisode->{EpisodeName}; + $episodePos++; + } + } + } + + # If we found episodes, sort them + if (scalar( $seasonEpisodes) > 0) + { + my @sortedSeasonEpisodes = sort{ $a->[ 0 ] <=> $b->[ 0 ] } @{$seasonEpisodes}; + @{$self->{itemsList}[$self->{itemIdx}]->{episodes}} = @sortedSeasonEpisodes; + } + + + $self->{itemsList}[$self->{itemIdx}]->{firstaired} = $xml->{Series}->{FirstAired} + if (!ref($xml->{Series}->{FirstAired})); + $self->{itemsList}[$self->{itemIdx}]->{actors} = $xml->{Series}->{Actors} + if (!ref($xml->{Series}->{Actors})); + $self->{itemsList}[$self->{itemIdx}]->{genre} = $xml->{Series}->{Genre} + if (!ref($xml->{Series}->{Genre})); + $self->{itemsList}[$self->{itemIdx}]->{runtime} = $xml->{Series}->{Runtime} + if (!ref($xml->{Series}->{Runtime})); + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.thetvdb.com/?tab=season&seriesid=".$episode->{seriesid}."&seasonid=".$episode->{seasonid}."&lid=".$self->siteLanguageCode(); + } + } + + } + elsif ($self->{pass} != 2) + { + # Process a given url + $xml = $xs->XMLin( + $page, + ForceArray => ['Episode'], + KeyAttr => [], + ); + + # Need to grab the banners info too + my $response = $ua->get('http://www.thetvdb.com/api/A8CC4AF70D0385F3/series/'.$self->{seriesid}.'/banners.xml'); + my $result; + eval { + $result = $response->decoded_content; + }; + my $bannersxml = $xs->XMLin( + $result, + ForceArray => ['Banner'], + KeyAttr => [], + ); + + $self->{curInfo}->{series} = $xml->{Series}->{SeriesName} + if (!ref($xml->{Series}->{SeriesName})); + $self->{curInfo}->{synopsis} = $xml->{Series}->{Overview} + if (!ref($xml->{Series}->{Overview})); + $self->{curInfo}->{firstaired} = $xml->{Series}->{FirstAired} + if (!ref($xml->{Series}->{FirstAired})); + $self->{curInfo}->{time} = $xml->{Series}->{Runtime} + if (!ref($xml->{Series}->{Runtime})); + + if (!ref($xml->{Series}->{Actors})) + { + my $actorString = $xml->{Series}->{Actors}; + $actorString =~ s/^\|//; + $actorString =~ s/\|$//; + for my $actor (split(/\|/, $actorString)) + { + push @{$self->{curInfo}->{actors}}, [$actor]; + } + } + + if (!ref($xml->{Series}->{Genre})) + { + my $genreString = $xml->{Series}->{Genre}; + $genreString =~ s/^\|//; + $genreString =~ s/\|$//; + for my $genre (split(/\|/, $genreString)) + { + push @{$self->{curInfo}->{genre}}, [$genre]; + } + } + + # Find corresponding season number + foreach my $episode (@{$xml->{Episode}}) + { + if (($episode->{seasonid} == $self->{seasonid}) + && (!$self->{curInfo}->{season})) + { + $self->{curInfo}->{season} = $episode->{SeasonNumber}; + $self->{curInfo}->{webPage} = "http://www.thetvdb.com/?tab=season&seriesid=".$episode->{seriesid}."&seasonid=".$episode->{seasonid}."&lid=".$self->siteLanguageCode(); + } + } + + my $seasonEpisodes; + # Episodes + my $episodePos = 0; + foreach my $checkEpisode (@{$xml->{Episode}}) + { + if (($checkEpisode->{EpisodeNumber} != 0) || (!ref($checkEpisode->{EpisodeName}))) + { + # Prefer dvd episode numbers + if (($checkEpisode->{DVD_season} == $self->{curInfo}->{season}) + || ((ref($checkEpisode->{DVD_season})) && ($checkEpisode->{SeasonNumber} == $self->{curInfo}->{season}))) + { + if (ref($checkEpisode->{DVD_episodenumber})) + { + push (@{$seasonEpisodes},[ $checkEpisode->{EpisodeNumber}]); + } + else + { + my $trimmedEpNumber = $checkEpisode->{DVD_episodenumber}; + $trimmedEpNumber =~ /^(\d*)/; + push (@{$seasonEpisodes},[ $1]); + } + + push @{$seasonEpisodes->[ $episodePos ]}, $checkEpisode->{EpisodeName}; + $episodePos++; + } + } + } + + # If we found episodes, sort them + if (scalar( $seasonEpisodes) > 0) + { + my @sortedSeasonEpisodes = sort{ $a->[ 0 ] <=> $b->[ 0 ] } @{$seasonEpisodes}; + @{$self->{curInfo}->{episodes}} = @sortedSeasonEpisodes; + } + + # Find banner + foreach my $banner (@{$bannersxml->{Banner}}) + { + if ($banner->{Season} == $self->{curInfo}->{season}) + { + $self->{curInfo}->{image} = "http://thetvdb.com/banners/".$banner->{BannerPath} + if (!$self->{curInfo}->{image}); + } + } + + $self->{curInfo}->{name} = "temp"; + + } + else + { + $self->{curInfo}->{season} = $self->{itemsList}[$self->{wantedIdx}]->{season}; + $self->{curInfo}->{episode} = $self->{itemsList}[$self->{wantedIdx}]->{episode}; + $self->{curInfo}->{name} = $self->{itemsList}[$self->{wantedIdx}]->{name}; + $self->{curInfo}->{series} = $self->{itemsList}[$self->{wantedIdx}]->{series}; + $self->{curInfo}->{director} = $self->{itemsList}[$self->{wantedIdx}]->{director}; + $self->{curInfo}->{director} =~ s/^\|//; + $self->{curInfo}->{director} =~ s/\|$//; + $self->{curInfo}->{firstaired} = $self->{itemsList}[$self->{wantedIdx}]->{firstaired}; + $self->{curInfo}->{writer} = $self->{itemsList}[$self->{wantedIdx}]->{writer}; + $self->{curInfo}->{writer} =~ s/^\|//; + $self->{curInfo}->{writer} =~ s/\|$//; + + my $actorString = $self->{itemsList}[$self->{wantedIdx}]->{actors}; + $actorString =~ s/^\|//; + $actorString =~ s/\|$//; + for my $actor (split(/\|/, $actorString)) + { + push @{$self->{curInfo}->{actors}}, [$actor]; + } + + my $genreString = $self->{itemsList}[$self->{wantedIdx}]->{genre}; + $genreString =~ s/^\|//; + $genreString =~ s/\|$//; + for my $genre (split(/\|/, $genreString)) + { + push @{$self->{curInfo}->{genre}}, [$genre]; + } + + $self->{curInfo}->{time} = $self->{itemsList}[$self->{wantedIdx}]->{runtime}; + $self->{curInfo}->{image} = $self->{itemsList}[$self->{wantedIdx}]->{image}; + $self->{curInfo}->{synopsis} = $self->{itemsList}[$self->{wantedIdx}]->{overview}; + $self->{curInfo}->{webPage} = $self->{itemsList}[$self->{wantedIdx}]->{url}; + $self->{curInfo}->{episodes} = $self->{itemsList}[$self->{wantedIdx}]->{episodes}; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.thetvdb.com/api/GetSeries.php?seriesname=$word&language=".$self->siteLanguage(); + } + + sub getItemUrl + { + my ($self, $url) = @_; + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.thetvdb.com"; + } + elsif (index($url, "api") < 0) + { + # Url isn't for the tvdb api, so we need to find the episode id + # and return a url corresponding to the api page for this movie + + $url =~ /[\?&]id=([0-9]+)*/; + my $id = $1; + $url =~ /[\?&]seriesid=([0-9]+)*/; + $self->{seriesid} = $1; + $url =~ /[\?&]seasonid=([0-9]+)*/; + $self->{seasonid} = $1; + $url = "http://www.thetvdb.com/api/A8CC4AF70D0385F3/series/".$self->{seriesid}."/all/".$self->siteLanguage().".xml"; + } + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 2; + } + + sub getName + { + return "Tvdb"; + } + + sub needsLanguageTest + { + return 1; + } + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + if ($self->{pass} == 1) + { + $self->{hasField} = { + series => 1, + firstaired => 1, + }; + } + else + { + $self->{hasField} = { + series => 1, + season => 1 + }; + } + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'en'; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub siteLanguageCode + { + my $self = shift; + + return 7; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVseries/GCTvdbES.pm b/lib/gcstar/GCPlugins/GCTVseries/GCTvdbES.pm new file mode 100644 index 0000000..3943bf0 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVseries/GCTvdbES.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCTVseries::GCTvdbES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVseries::GCTvdb; + + +{ + package GCPlugins::GCTVseries::GCPluginTvdbES; + + use base qw(GCPlugins::GCTVseries::GCPluginTvdb); + + sub getLang + { + return 'ES'; + } + + sub getName + { + return "Tvdb ES"; + } + + sub siteLanguage + { + my $self = shift; + + return 'es'; + } + + sub siteLanguageCode + { + my $self = shift; + + return 16; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVseries/GCTvdbFR.pm b/lib/gcstar/GCPlugins/GCTVseries/GCTvdbFR.pm new file mode 100644 index 0000000..6590f30 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVseries/GCTvdbFR.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCTVseries::GCTvdbFR; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVseries::GCTvdb; + + +{ + package GCPlugins::GCTVseries::GCPluginTvdbFR; + + use base qw(GCPlugins::GCTVseries::GCPluginTvdb); + + sub getLang + { + return 'FR'; + } + + sub getName + { + return "Tvdb FR"; + } + + sub siteLanguage + { + my $self = shift; + + return 'fr'; + } + + sub siteLanguageCode + { + my $self = shift; + + return 17; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCTVseries/GCTvdbIT.pm b/lib/gcstar/GCPlugins/GCTVseries/GCTvdbIT.pm new file mode 100644 index 0000000..cc4da08 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCTVseries/GCTvdbIT.pm @@ -0,0 +1,60 @@ +package GCPlugins::GCTVseries::GCTvdbIT; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCTVseries::GCTvdb; + + +{ + package GCPlugins::GCTVseries::GCPluginTvdbIT; + + use base qw(GCPlugins::GCTVseries::GCPluginTvdb); + + sub getLang + { + return 'IT'; + } + + sub getName + { + return "Tvdb IT"; + } + + sub siteLanguage + { + my $self = shift; + + return 'it'; + } + + sub siteLanguageCode + { + my $self = shift; + + return 15; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCboardgames/GCReservoirJeux.pm b/lib/gcstar/GCPlugins/GCboardgames/GCReservoirJeux.pm new file mode 100644 index 0000000..a1a4500 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCboardgames/GCReservoirJeux.pm @@ -0,0 +1,418 @@ +package GCPlugins::GCboardgames::GCReservoirJeux; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCboardgames::GCboardgamesCommon; + +{ + package GCPlugins::GCboardgames::GCPluginReservoirJeux; + + use base qw(GCPlugins::GCboardgames::GCboardgamesPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + + if ($self->{parsingList}) + { + # Parse the search results here + if (($tagname eq "h3") && ($attr->{class} =~ /^rusearch_result/)) + { + $self->{itemIdx}++; + $self->{isBoardgame} = 1; + $self->{insideName} = 1; + } + if ($self->{isBoardgame}) + { + if (($tagname eq "a") && ($attr->{href} ne "#") && ($attr->{class} =~ /^lien_item/)) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isBoardgame} = 0; + } + } + } + else + { + # Parse the items page here. Basically we do this by seaching for tags which match certain criteria, then preparing to grab + # the text inside these tags + + if (($tagname eq "h1")) + { + $self->{insideName} = 1; + } + elsif (($tagname eq "div")) + { + if ($attr->{id} eq "fiche_technique_image") + { + $self->{insideImage} = 1; + } + elsif ($attr->{id} eq "bloc_centre_extensions") + { + $self->{insideExpansionList} = 1; + } + elsif ($attr->{id} eq "bloc_centre_extensions_bottom") + { + $self->{insideExpansionList} = 0; + } + elsif ($attr->{class} eq "fiche_technique_sep") + { + $self->{insideCategoryRow} = 0; + $self->{insideMechanicRow} = 0; + } + + } + elsif ($tagname eq "img") + { + if ($self->{insideImage}) + { + $self->{curInfo}->{boxpic} = "http://www.reservoir-jeux.com".$attr->{src} if ! $self->{curInfo}->{boxpic}; + $self->{insideImage} = 0; + } + if ($self->{insideExpansionList}) + { + $self->{curInfo}->{expandedby} .= $attr->{alt}.',' + } + } + elsif ($tagname eq "a") + { + if ($attr->{class} eq "lien_item") + { + if ($self->{nextIsExpands}) + { + $self->{insideExpands} = 1; + $self->{nextIsExpands} = 0; + } + + if ($attr->{href} =~ /type=editeur/) + { + $self->{insidePublisher} = 1; + } + elsif ($attr->{href} =~ /type=auteur/) + { + $self->{insideDesigner} = 1; + } + elsif ($attr->{href} =~ /type=illustrateur/) + { + $self->{insideIllustrator} = 1; + } + elsif ($attr->{href} =~ /tag_id=/) + { + if ($self->{insideMechanicRow}) + { + $self->{insideMechanic} = 1; + } + elsif ($self->{insideCategoryRow}) + { + $self->{insideCategory} = 1; + } + } + elsif ($attr->{href} =~ /type=illustrateur/) + { + $self->{insideIllustrator} = 1; + } + + + } + } + elsif (($tagname eq "span") && ($attr->{class} eq "prod_description")) + { + $self->{insideDescription} = 1; + } + + if ($tagname eq "br") + { + if($self->{insideDesignerRow}) + { + $self->{curInfo}->{designedby} =~ s/\s\x2d\s$//g; + $self->{insideDesignerRow} = 0; + } + if($self->{insideIllustratorRow}) + { + $self->{curInfo}->{illustratedby} =~ s/\s\x2d\s$//g; + $self->{insideIllustratorRow} = 0; + } + } + + if ($self->{insideDescription}) + { + if (($tagname eq "br") || ($tagname eq "p")) + { + # neatens up the description a little by starting new line on br tags + $self->{curInfo}->{description} .= "\n"; + } + elsif ($tagname eq "li") + { + # basic formatting of lists + $self->{curInfo}->{description} .= " - "; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{insideTechnicalDetails} && $tagname eq "div") + { + $self->{insideTechnicalDetails} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if (length($origtext) < 2); + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/\n//g; + $origtext =~ s/^\s{2,//; + #French accents substitution + $origtext =~ s/à/à/; + $origtext =~ s/À/À/; + $origtext =~ s/é/é/; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isBoardgame} && $self->{insideName}) + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext; + $self->{insideName} = 0; + } + + } + else + { + # Parse the text items page here. + + if ($self->{insideName}) + { + $self->{curInfo}->{name} = $origtext; + $self->{curInfo}->{name} =~ s/^\s+//; + $self->{curInfo}->{name} =~ s/\s+\Z//; + $self->{insideName} = 0; + } + if ($self->{inside}->{h2}) + { + if ($origtext =~ /^Fiche technique/) + { + $self->{insideTechnicalDetails} = 1; + } + elsif ($origtext =~ /^M\xe9canismes/) + { + $self->{insideMechanicRow} = 1; + } + elsif ($origtext =~/^Th\xe8mes/) + { + $self->{insideCategoryRow} = 1; + + } + } + if ($self->{insideTechnicalDetails}) + { + if ($origtext =~ /^Date de sortie/) + { + $self->{curInfo}->{released} = $origtext; + $self->{curInfo}->{released} =~ s/Date de sortie : //g + } + elsif( $origtext =~ /Dur\xe9e : /) + { + $self->{curInfo}->{playingtime} = $origtext; + $self->{curInfo}->{playingtime} =~ s/\s*Dur\xe9e : //g; + } + elsif($origtext =~ /\xc0 partir de\s[0-9]*\sans/) + { + $self->{curInfo}->{suggestedage} = $origtext; + $self->{curInfo}->{suggestedage} =~ s/^\s*//g; + } + elsif ($origtext =~ /De [0-9]* \xe0 [0-9]* joueurs/) + { + $self->{curInfo}->{players} = $origtext; + $self->{curInfo}->{players} =~ s/^\s*De //g; + $self->{curInfo}->{players} =~ s/ joueurs//g; + } + } + if ($self->{insideDesigner}) + { + # Append text (and trailing ,) to existing designer field + $self->{curInfo}->{designedby} .= $origtext." - "; + $self->{insideDesigner} = 0; + } + if ($self->{insideIllustrator}) + { + # Append text (and trailing ,) to existing illustrator field + $self->{curInfo}->{illustratedby} .= $origtext." - "; + $self->{insideIllustrator} = 0; + } + if ($self->{insidePublisher}) + { + $self->{curInfo}->{publishedby} = $origtext; + $self->{insidePublisher} = 0; + } + if ($self->{insideExpands}) + { + $self->{curInfo}->{expansionfor} = $origtext; + $self->{insideExpands} = 0; + } + if ($self->{insideMechanic}) + { + $self->{curInfo}->{mechanics} .= $self->capWord($origtext).','; + $self->{insideMechanic} = 0; + } + if ($self->{insideCategory}) + { + $self->{curInfo}->{category} .= $self->capWord($origtext).','; + $self->{insideCategory} = 0; + } + + + if ($origtext =~ /^\s*Auteur(s)? : /) + { + $self->{insideDesignerRow} = 1; + } + if ($origtext =~ /^\s*Illustrateur(s)? : /) + { + $self->{insideIllustratorRow} = 1; + } + if ($origtext =~ /^Ce produit est une extension de :/) + { + $self->{nextIsExpands} = 1; + } + if ($self->{insideDescription}) + { + $self->{curInfo}->{description} .= $origtext; + } + } + } + + sub comment + { + my ($self, $comment) = @_; + + if ($self->{parsingList}) + { + + } + else + { + if ($comment =~ /\/div/) + { + if($self->{insideDescription}) + { + $self->{insideDescription} = 0; + # remove spaces from start and end of description + $self->{curInfo}->{description} =~ s/^\s+//; + $self->{curInfo}->{description} =~ s/\s+$//; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + name => 1, + }; + + $self->{isBoardgame} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # Url returned below is the for the search page, where $word is replaced by the search + return ('http://www.reservoir-jeux.com/recherche.php', ['search' => $word, 'secteurid' => '-1', 'dv' => '30']); + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url =~ /^http:/; + if ($url =~ /^\//) + { + return "http://www.reservoir-jeux.com".$url; + } + else + { + return "http://www.reservoir-jeux.com/".$url; + } + } + + sub getName + { + return "Reservoir Jeux"; + } + + sub getAuthor + { + return 'Florent'; + } + + sub getLang + { + return 'FR'; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCboardgames/GCboardgamegeek.pm b/lib/gcstar/GCPlugins/GCboardgames/GCboardgamegeek.pm new file mode 100644 index 0000000..038198f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCboardgames/GCboardgamegeek.pm @@ -0,0 +1,278 @@ +package GCPlugins::GCboardgames::GCboardgamegeek; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCboardgames::GCboardgamesCommon; + +{ + package GCPlugins::GCboardgames::GCPluginboardgamegeek; + + use base qw(GCPlugins::GCboardgames::GCboardgamesPluginsBase); + use XML::Simple; + use HTML::Entities; + use Encode; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + $xml = $xs->XMLin($page, ForceArray => ['boardgame'], KeyAttr => ['objectid']); + my $game; + foreach $game ( keys( %{ $xml -> {'boardgame'}} ) ) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.boardgamegeek.com/xmlapi/boardgame/".$game; + # Better check how the name is returned, the bgg api can be a little funny here + if (ref($xml->{'boardgame'}->{$game}->{'name'})) + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $xml->{'boardgame'}->{$game}->{'name'}->{'content'}; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $xml->{'boardgame'}->{$game}->{'name'}; + } + + if (!ref($xml->{'boardgame'}->{$game}->{'yearpublished'})) + { + $self->{itemsList}[$self->{itemIdx}]->{released} = $xml->{'boardgame'}->{$game}->{'yearpublished'}; + } + } + } + else + { + $xml = $xs->XMLin($page, ForceArray => ['name','boardgamedesigner','boardgameartist','boardgamepublisher', + 'boardgamecategory','boardgamemechanic','boardgameexpansion'], + KeyAttr => []); + + $self->{curInfo}->{released} = $xml->{boardgame}->{yearpublished}; + $self->{curInfo}->{released} =~ s/([^0-9])//g; + $self->{curInfo}->{players} = $xml->{boardgame}->{minplayers}."-".$xml->{boardgame}->{maxplayers}; + $self->{curInfo}->{playingtime} = $xml->{boardgame}->{playingtime}." mins"; + $self->{curInfo}->{suggestedage} = $xml->{boardgame}->{age}; + + my $primaryName = ""; + for my $name (@{$xml->{boardgame}->{name}}) + { + $primaryName = $name->{content} + if $name->{primary} eq "true"; + } + + if (($primaryName ne $self->{itemsList}[$self->{wantedIdx}]->{name}) + && ($self->{itemsList}[$self->{wantedIdx}]->{name})) + { + # Name returned by boardgamegeek is different to the one the user selected + # this means they choose an translated name, so use the name they choose + # as the default, and put boardgamegeek's name in as the original (untranslated) name of the game + $self->{curInfo}->{name} = $self->{itemsList}[$self->{wantedIdx}]->{name}; + $self->{curInfo}->{original} = $primaryName; + } + else + { + $self->{curInfo}->{name} = $primaryName; + } + + # Have to decode the html type characters here + $self->{curInfo}->{description} = decode_entities($xml->{boardgame}->{description}); + $self->{curInfo}->{description} =~ s/\<br\/>/\n/g; + $self->{curInfo}->{description} =~ s/<.*?>//g; + + if ($self->{bigPics}) + { + $self->{curInfo}->{boxpic} = $xml->{boardgame}->{image}; + } + else + { + $self->{curInfo}->{boxpic} = $xml->{boardgame}->{thumbnail}; + } + + for my $designer (@{$xml->{boardgame}->{boardgamedesigner}}) + { + $self->{curInfo}->{designedby} .= $designer->{content}.', '; + } + $self->{curInfo}->{designedby} =~ s/, $//; + + for my $artist (@{$xml->{boardgame}->{boardgameartist}}) + { + $self->{curInfo}->{illustratedby} .= $artist->{content}.', '; + } + $self->{curInfo}->{illustratedby} =~ s/, $//; + + for my $publisher (@{$xml->{boardgame}->{boardgamepublisher}}) + { + $self->{curInfo}->{publishedby} .= $publisher->{content}.', '; + } + $self->{curInfo}->{publishedby} =~ s/, $//; + + for my $category (@{$xml->{boardgame}->{boardgamecategory}}) + { + push @{$self->{curInfo}->{category}}, [$category->{content}]; + } + + for my $mechanic (@{$xml->{boardgame}->{boardgamemechanic}}) + { + push @{$self->{curInfo}->{mechanics}}, [$mechanic->{content}]; + } + + for my $expansion (@{$xml->{boardgame}->{boardgameexpansion}}) + { + if ($expansion->{inbound}) + { + if ($self->{curInfo}->{expansionfor}) + { + $self->{curInfo}->{expansionfor} .= ", "; + } + $self->{curInfo}->{expansionfor} .= $expansion->{content}; + } + else + { + push @{$self->{curInfo}->{expandedby}}, [$expansion->{content}]; + } + } + $self->{curInfo}->{web} = "http://boardgamegeek.com/boardgame/".$xml->{boardgame}->{objectid}; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + name => 1, + released => 1, + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.boardgamegeek.com"; + } + elsif (index($url,"xmlapi") < 0) + { + # Url isn't for the bgg api, so we need to find the game id + # and return a url corresponding to the api page for this game + $url =~ /\/([0-9]+)[\/]*/; + my $id = $1; + $url = "http://www.boardgamegeek.com/xmlapi/boardgame/".$id; + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + # Quick and dirty fixes because the bgg api struggles with some words. Should not be required anymore (7/6/2010) + # $word =~ s/the\+/\+/ig; + # $word =~ s/\+and+/\+/g; + # $word =~ s/\+of\+/\+/g; + + return "http://www.boardgamegeek.com/xmlapi/search?search=$word"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getName + { + return "Board Game Geek"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub isPreferred + { + return 1; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCboardgames/GCboardgamesCommon.pm b/lib/gcstar/GCPlugins/GCboardgames/GCboardgamesCommon.pm new file mode 100644 index 0000000..088f077 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCboardgames/GCboardgamesCommon.pm @@ -0,0 +1,58 @@ +package GCPlugins::GCboardgames::GCboardgamesCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCboardgames::GCboardgamesPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['name']; + } + + sub loadUrl + { + my ($self, $url) = @_; + + $self->SUPER::loadUrl($url); + + return $self->{curInfo}; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCboardgames/GCtrictrac.pm b/lib/gcstar/GCPlugins/GCboardgames/GCtrictrac.pm new file mode 100644 index 0000000..e30e189 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCboardgames/GCtrictrac.pm @@ -0,0 +1,462 @@ +package GCPlugins::GCboardgames::GCtrictrac; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCboardgames::GCboardgamesCommon; + +{ + package GCPlugins::GCboardgames::GCPlugintrictrac; + + use base qw(GCPlugins::GCboardgames::GCboardgamesPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + + if ($self->{parsingList}) + { + # Parse the search results here + + # Check if we are currently parsing an item page, not a search results page (ie - exact match has taken us straight to the page) + # Do this by checking if there is a heading on the page + if (($tagname eq "font") && ($attr->{style} =~ /FONT-SIZE: 20px/)) + { + # Stop parsing results, switch to item parsing + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + + # Quite easy to parse the search results page since all the information we need (url, title, year) is contained within the <a> + # tag for the image of each search result + + # TODO - check how search results look when they do not have an image?? + + # Check if tag is an <a>, the url referenced is valid (not "#"), and the onmouseover text looks right + if (($tagname eq "a") && ($attr->{href} ne "#") && ($attr->{onmouseover} =~ /^(return overlib)/)) + { + + # Add to search results + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + + my $mouseoverText = $attr->{onmouseover}; + + # Parse some regular expressions to find the name and release date + if ($mouseoverText =~ /<b>(.+)<\/b>/) + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $1; + } + if ($mouseoverText =~ /<\/b> \((\d+)\)/) + { + $self->{itemsList}[$self->{itemIdx}]->{released} = $1; + } + } + } + else + { + # Parse the items page here. Basically we do this by seaching for tags which match certain criteria, then preparing to grab + # the text inside these tags + + if (($tagname eq "font") && ($attr->{style} =~ /FONT-SIZE: 20px/)) + { + $self->{insideName} = 1; + } + elsif (($tagname eq "font") && ($attr->{style} =~ /FONT-SIZE: 12px/)) + { + if ($self->{nextIsPlayers}) + { + $self->{insidePlayers} = 1; + $self->{nextIsPlayers} = 0; + } + if ($self->{nextIsAges}) + { + $self->{insideAges} = 1; + $self->{nextIsAges} = 0; + } + if ($self->{nextIsPlayingTime}) + { + $self->{insidePlayingTime} = 1; + $self->{nextIsPlayingTime} = 0; + } + + } + elsif (($tagname eq "td") && ($attr->{height} eq "250") && ($attr->{width} eq "250")) + { + $self->{insideImage} = 1; + } + elsif ($tagname eq "img") + { + if ($self->{insideImage}) + { + $self->{curInfo}->{boxpic} = "http://trictrac.net".$attr->{src} if ! $self->{curInfo}->{boxpic}; + $self->{insideImage} = 0; + } + } + elsif ($tagname eq "a") + { + if ($self->{nextIsYear}) + { + $self->{insideYear} = 1; + $self->{nextIsYear} = 0; + } + if ($self->{insideDesignerRow}) + { + $self->{insideDesigner} = 1; + } + if ($self->{insideIllustratorRow}) + { + $self->{insideIllustrator} = 1; + } + if ($self->{nextIsPublishers}) + { + $self->{insidePublishers} = 1; + $self->{nextIsPublishers} = 0; + } + if ($self->{insideMechanicRow}) + { + $self->{insideMechanic} = 1; + } + if ($self->{insideCategoryRow}) + { + $self->{insideCategory} = 1; + } + + } + elsif ($tagname eq "b") + { + if ($self->{insideExpansionList}) + { + $self->{insideExpansion} = 1; + } + } + elsif (($tagname eq "p") && ( $attr->{style} =~ /TEXT-ALIGN: justify/)) + { + $self->{insideDescription} = 1; + } + if ($self->{insideDescription}) + { + if ($tagname eq "br") + { + # neatens up the description a little by starting new line on br tags + $self->{curInfo}->{description} .= "\n"; + } + elsif ($tagname eq "li") + { + # basic formatting of lists + $self->{curInfo}->{description} .= " - "; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + if ($tagname eq "tr") + { + if ($self->{insideDesignerRow}) + { + # Use regex to strip final , off end of line + $self->{curInfo}->{designedby} =~ s/(, )$//; + $self->{insideDesignerRow} = 0; + } + if ($self->{insideIllustratorRow}) + { + # Use regex to strip final , off end of line + $self->{curInfo}->{illustratedby} =~ s/(, )$//; + $self->{insideIllustratorRow} = 0; + } + if ($self->{insideMechanicRow}) + { + $self->{insideMechanicRow} = 0; + } + if ($self->{insideCategoryRow}) + { + $self->{insideCategoryRow} = 0; + } + } + elsif ($tagname eq "table") + { + if ($self->{insideExpansionList}) + { + $self->{insideExpansionList} = 0; + } + } + elsif ($tagname eq "b") + { + if ($self->{insideExpands}) + { + $self->{curInfo}->{expansionfor} =~ s/"//g; + $self->{insideExpands} = 0; + } + } + elsif (($tagname eq "td") && ($self->{insideDescription})) + { + $self->{insideDescription} = 0; + # remove spaces from start and end of description + $self->{curInfo}->{description} =~ s/^\s+//; + $self->{curInfo}->{description} =~ s/\s+$//; + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if (length($origtext) < 2); + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/\n//g; + $origtext =~ s/^\s{2,//; + #French accents substitution + $origtext =~ s/à/à/; + $origtext =~ s/é/é/; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + + } + else + { + # fetching information from page + if ($origtext =~ /^Nom VO/) + { + $self->{curInfo}->{original} = $origtext; + $self->{curInfo}->{original} =~ s/Nom VO : //; + } + if ($self->{insideName}) + { + $self->{curInfo}->{name} = $origtext; + $self->{insideName} = 0; + } + elsif ($self->{insideYear}) + { + $self->{curInfo}->{released} = $origtext; + $self->{curInfo}->{released} =~ s/([^0-9])//g; + $self->{insideYear} = 0; + } + elsif ($self->{insideDesigner}) + { + # Append text (and trailing ,) to existing designer field + $self->{curInfo}->{designedby} .= $origtext.", "; + $self->{insideDesigner} = 0; + } + elsif ($self->{insideIllustrator}) + { + # Append text (and trailing ,) to existing designer field + $self->{curInfo}->{illustratedby} .= $origtext.", "; + $self->{insideIllustrator} = 0; + } + elsif ($self->{insidePublishers}) + { + $self->{curInfo}->{publishedby} = $origtext; + $self->{insidePublishers} = 0; + } + elsif ($self->{insidePlayers}) + { + $self->{curInfo}->{players} = $origtext; + $self->{insidePlayers} = 0; + } + elsif ($self->{insideAges}) + { + $self->{curInfo}->{suggestedage} = $origtext; + $self->{insideAges} = 0; + } + elsif ($self->{insidePlayingTime}) + { + $self->{curInfo}->{playingtime} = $origtext; + $self->{insidePlayingTime} = 0; + } + elsif ($self->{insideExpands}) + { + $self->{curInfo}->{expansionfor} .= $origtext; + + } + elsif ($self->{insideExpansion}) + { + $self->{curInfo}->{expandedby} .= $self->capWord($origtext).','; + $self->{insideExpansion} = 0; + } + elsif ($self->{insideDescription}) + { + $self->{curInfo}->{description} .= $origtext; + } + elsif ($self->{insideMechanic}) + { + $self->{curInfo}->{mechanics} .= $self->capWord($origtext).','; + $self->{insideMechanic} = 0; + } + elsif ($self->{insideCategory}) + { + $self->{curInfo}->{category} .= $self->capWord($origtext).','; + $self->{insideCategory} = 0; + } + + + # Pre-detection based on text (not tags) for various fields + # that have no specific id in tags + if ($origtext =~ /^Ann\xe9e/) + { + $self->{nextIsYear} = 1; + } + if ($origtext =~ /^Auteur/) + { + $self->{insideDesignerRow} = 1; + } + if ($origtext =~ /^Illustrateur/) + { + $self->{insideIllustratorRow} = 1; + } + if ($origtext =~ /^Editeur/) + { + $self->{nextIsPublishers} = 1; + } + if ($origtext =~ /^Joueurs/) + { + $self->{nextIsPlayers} = 1; + } + if ($origtext =~ /^Age/) + { + $self->{nextIsAges} = 1; + } + if ($origtext =~ /^Dur/) + { + $self->{nextIsPlayingTime} = 1; + } + if ($origtext =~ /^Ceci est une extension pour/) + { + $self->{insideExpands} = 1; + } + if ($origtext =~ /canisme\(s\)/) + { + $self->{insideMechanicRow} = 1; + } + if ($origtext =~ /Th.{1,8}me\(s\)/) + { + $self->{insideCategoryRow} = 1; + } + if ($origtext =~ /^Les extensions/) + { + $self->{insideExpansionList} = 1; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + name => 1, + released => 1, + }; + + $self->{isBoardgame} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # Url returned below is the for the search page, where $word is replaced by the search + return "http://trictrac.net/index.php3?id=jeux&rub=ludotheque&inf=cat&choix=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url =~ /^http:/; + if ($url =~ /^\//) + { + return "http://trictrac.net".$url; + } + else + { + return "http://trictrac.net/".$url; + } + } + + sub getName + { + return "Tric Trac"; + } + + sub getAuthor + { + return 'Florent'; + } + + sub getLang + { + return 'FR'; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm new file mode 100644 index 0000000..34997a8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCbooks::GCAdlibrisFI; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +{ + package GCPlugins::GCbooks::GCPluginAdlibrisFI; + + use base qw(GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase); + use URI::Escape; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{isLang} = 'fi'; + + return $self; + } + + sub getName + { + return "Adlibris (FI)"; + } + + sub getLang + { + return 'FI'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm new file mode 100644 index 0000000..f17abdb --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCbooks::GCAdlibrisSV; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +{ + package GCPlugins::GCbooks::GCPluginAdlibrisSV; + + use base qw(GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase); + use URI::Escape; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{isLang} = 'se'; + + return $self; + } + + sub getName + { + return "Adlibris (SV)"; + } + + sub getLang + { + return 'SV'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm b/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm new file mode 100644 index 0000000..44f3da0 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm @@ -0,0 +1,391 @@ +package GCPlugins::GCbooks::GCAlapage;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginAlapage;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+
+ if (($tagname eq 'div') && ($attr->{class} eq 'infos_produit'))
+ {
+ $self->{isBook} = 1 ;
+ $self->{isUrl} = 1 ;
+ }
+ elsif ($tagname eq 'div')
+ {
+ $self->{isBook} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isBook}))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title};
+ $self->{isUrl} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_auteurs") >= 0) && ($self->{isBook}))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'br') && ($self->{isBook}))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ }
+ else
+ {
+ if ($self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif ($self->{isPublication} eq 1)
+ {
+ $self->{isPublication} = 2 ;
+ }
+ elsif ($self->{isFormat} eq 1)
+ {
+ $self->{isFormat} = 2 ;
+ }
+ elsif ($self->{isPage} eq 1)
+ {
+ $self->{isPage} = 2 ;
+ }
+ elsif ($tagname eq 'h2')
+ {
+ $self->{isTitle} = 1 ;
+ }
+ elsif (($tagname eq 'tpfcommentaire') && ($self->{isDescription} eq 1))
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_auteurs") >= 0))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{class} eq 'thickbox tooltip') && ($self->{curInfo}->{cover} eq ''))
+ {
+ my $html = $self->loadPage( "http://www.alapage.com" . $attr->{href}, 0, 1);
+ my $found = index($html,"\"laplusgrande\"");
+ if ( $found >= 0 )
+ {
+ my $found2 = index($html,"&m=v");
+ $html = substr($html, $found +length('"laplusgrande"'),length($html)- $found -length('"laplusgrande"'));
+
+ my @array = split(/"/,$html);
+ #"
+ $self->{curInfo}->{cover} = "http://www.alapage.com" . $array[1];
+ if ( $found2 >= 0 )
+ {
+ $self->{curInfo}->{backpic} = $self->{curInfo}->{cover};
+ $self->{curInfo}->{backpic} =~ s|&m=r|&m=v|gi;
+ }
+ }
+ }
+ elsif ($tagname eq 'li')
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_cdu") >= 0))
+ {
+ $self->{isGenre} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_coll_serie") >= 0))
+ {
+ $self->{isSerie} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_editeur") >= 0) && ( index($attr->{href},"mot_coll_serie") == -1))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} eq 'comment'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'blocWithMargin') && ($self->{isDescription}) && ($self->{curInfo}->{description} eq '') )
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} ne ''))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'edito FP_commentaire'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isAuthor})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+
+ if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
+ }
+ else
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ my @array = split(/,/,$origtext);
+
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0];
+ $self->{itemsList}[$self->{itemIdx}]->{edition} =~ s/^\s+//;
+ $self->{itemsList}[$self->{itemIdx}]->{edition} =~ s/\s+$//;
+
+ if ($#array ne 0 )
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[$#array];
+ $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//;
+ $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s+$//;
+ }
+
+ $self->{isPublisher} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ if ($self->{isTitle})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{curInfo}->{language} = 'Français';
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor})
+ {
+ $self->{curInfo}->{authors} .= $origtext;
+ $self->{curInfo}->{authors} .= ",";
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i);
+ $self->{isFormat} = 1 if ($origtext =~ m/Dimensions/i);
+ $self->{isPublication} = 1 if ($origtext =~ m/Date de parution/i);
+ $self->{isPage} = 1 if ($origtext =~ m/Nombre de pages/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 0 ;
+ }
+ elsif ($self->{isGenre})
+ {
+ my @array = split(/,/,$origtext);
+ my $element;
+ foreach $element (@array)
+ {
+ $element =~ s/^\s+//;
+ $self->{curInfo}->{genre} .= $element;
+ $self->{curInfo}->{genre} .= ",";
+ }
+ $self->{isGenre} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ elsif ($self->{isFormat} eq 2)
+ {
+ $self->{curInfo}->{format} = $origtext;
+ $self->{isFormat} = 0 ;
+ }
+ elsif ($self->{isPublication} eq 2)
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isPage} eq 2)
+ {
+ $self->{curInfo}->{pages} = $origtext;
+ $self->{isPage} = 0 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ $self->{curInfo}->{description} = $origtext;
+ $self->{isDescription} = 0 ;
+ }
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 1,
+ format => 0,
+ edition => 1,
+ serie => 0,
+ };
+
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isAnalyse} = 0;
+ $self->{isISBN} = 0;
+ $self->{isGenre} = 0;
+ $self->{isPublication} = 0;
+ $self->{isPage} = 0;
+ $self->{isFormat} = 0;
+ $self->{isSerie} = 0;
+ $self->{isDescription} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ $html =~ s|<p>||gi;
+ $html =~ s|</p>||gi;
+ }
+ else
+ {
+ $html =~ s|<font style="font-size:13px;">||gi;
+ $html =~ s|<font style="font-size:13px;">||gi;
+ $html =~ s|</font>||gi;
+ $html =~ s|<strong>||gi;
+ $html =~ s|</strong>|</strong><tpfanalyse>|gi;
+ $html =~ s|</h3>|</h3><tpfcommentaire>|gi;
+ $html =~ s|<p>||gi;
+ $html =~ s|</p>||gi;
+ }
+
+ return $html;
+
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'isbn')
+ {
+ return "http://www.alapage.com/-/Recherche/?type=1&mot_isbn=" . $word;
+ }
+ else
+ {
+ return "http://www.alapage.com/-/Recherche/?type=1&mot_titre=" . $word;
+ }
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.alapage.com" . $url;
+ }
+
+ sub getName
+ {
+ return "Alapage";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn','title'];
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-15";
+ }
+
+ sub getDefaultPictureSuffix
+ {
+ return '.jpg';
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm new file mode 100644 index 0000000..7d70ec4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm @@ -0,0 +1,352 @@ +package GCPlugins::GCbooks::GCAmazon; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginAmazon; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use XML::Simple; + use LWP::Simple qw($ua); + use Encode; + use HTML::Entities; + use GCUtils; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + $xml = $xs->XMLin($page, ForceArray => ['Item','Author'], KeyAttr => []); + my $book; + foreach $book ( @{ $xml -> {'Items'} -> {'Item'} }) + { + $self->{itemIdx}++; + my $url = $self->baseAWSUrl."&Operation=ItemLookup&ResponseGroup=Large,EditorialReview&ItemId=".$book->{ASIN}; + + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + $self->{itemsList}[$self->{itemIdx}]->{title} = $book->{ItemAttributes}->{'Title'}; + for my $author (@{$book->{ItemAttributes}->{'Author'}}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " + if $self->{itemsList}[$self->{itemIdx}]->{authors}; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $author; + } + $self->{itemsList}[$self->{itemIdx}]->{publication} = $book->{ItemAttributes}->{'PublicationDate'}; + $self->{itemsList}[$self->{itemIdx}]->{format} = $book->{ItemAttributes}->{'Binding'}; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $book->{ItemAttributes}->{'Edition'}; + } + } + else + { + $xml = $xs->XMLin($page, ForceArray => ['Author','EditorialReview','Language'], KeyAttr => []); + $self->{curInfo}->{title} = $xml->{Items}->{Item}->{ItemAttributes}->{Title}; + for my $author (@{$xml->{Items}->{Item}->{ItemAttributes}->{Author}}) + { + push @{$self->{curInfo}->{authors}}, [$author]; + } + + my $htmlDescription; + if ($xml->{Items}->{Item}->{EditorialReviews}->{EditorialReview}[0]->{Content}) + { + $htmlDescription = $xml->{Items}->{Item}->{EditorialReviews}->{EditorialReview}[0]->{Content}; + } + else + { + # Unfortunately the api doesn't always return the product description, which is due to + # copyright concerns or something. In this case, grab the product html and parse it for + # the description. + my $response = $ua->get($xml->{Items}->{Item}->{DetailPageURL}); + my $result; + eval { + $result = $response->decoded_content; + }; + + # Replace some bad characters. TODO - will probably need to extend this for de/jp plugins + $result =~ s|\x{92}|'|gi; + $result =~ s|’|'|gi; + $result =~ s|•|*|gi; + $result =~ s|œ|oe|gi; + $result =~ s|…|...|gi; + $result =~ s|\x{85}|...|gi; + $result =~ s|\x{8C}|OE|gi; + $result =~ s|\x{9C}|oe|gi; + $result =~ s|ü|ü|g; + $result =~ s|ß|ß|g; + $result =~ s|ö|ö|g; + $result =~ s|Ü|Ü|g; + $result =~ s|ä|ä|g; + $result =~ s/„/»/gm; + $result =~ s/“/«/gm; + + # Chop out the product description + $result =~ /<div class="productDescriptionWrapper">(.*?)<(\/)*?div/s; + $htmlDescription = $1; + + # Decode + decode_entities($htmlDescription); + $htmlDescription = decode('ISO-8859-1', $htmlDescription); + } + + # Replace some html with line breaks, strip out the rest + $htmlDescription =~ s/<br>/\n/ig; + $htmlDescription =~ s/<p>/\n\n/ig; + $htmlDescription =~ s/<(.*?)>//gi; + $htmlDescription =~ s/^\s*//; + $htmlDescription =~ s/\s*$//; + $htmlDescription =~ s/ {1,}/ /g; + $self->{curInfo}->{description} = $htmlDescription; + + $self->{curInfo}->{publisher} = $xml->{Items}->{Item}->{ItemAttributes}->{Publisher} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Publisher})); + $self->{curInfo}->{publication} = $xml->{Items}->{Item}->{ItemAttributes}->{PublicationDate} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{PublicationDate})); + $self->{curInfo}->{language} = $xml->{Items}->{Item}->{ItemAttributes}->{Languages}->{Language}[0]->{Name} + if (ref($xml->{Items}->{Item}->{ItemAttributes}->{Languages}->{Language})); + $self->{curInfo}->{pages} = $xml->{Items}->{Item}->{ItemAttributes}->{NumberOfPages} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{NumberOfPages})); + $self->{curInfo}->{isbn} = $xml->{Items}->{Item}->{ItemAttributes}->{EAN} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{EAN})); + $self->{curInfo}->{format} = $xml->{Items}->{Item}->{ItemAttributes}->{Binding} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Binding})); + $self->{curInfo}->{edition} = $xml->{Items}->{Item}->{ItemAttributes}->{Edition} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Edition})); + $self->{curInfo}->{web} = $xml->{Items}->{Item}->{DetailPageURL}; + + # Genre handling via Amazon's browsenodes. Stupidly complicated way of doing things, IMO + # Loop through all the nodes: + for my $node (@{$xml->{Items}->{Item}->{BrowseNodes}->{BrowseNode}}) + { + my $genre = ''; + my $ancestor = $node; + + # Push the lowest node to the temporary genre list + my @genre_list = ($node->{Name}); + + # Start stepping down through the current node to find it's children + while ($ancestor->{Ancestors}->{BrowseNode}) + { + $ancestor = $ancestor->{Ancestors}->{BrowseNode}; + if (($ancestor->{Name} eq 'Specialty Stores') || + ($ancestor->{Name} eq 'Refinements') || + ($ancestor->{Name} eq 'Self Service') || + ($ancestor->{Name} eq 'Specialty Boutique')) + { + # Some categories we definetly want to ignore, since they are full of rubbish tags + $genre = 'ignore'; + last; + } + elsif ($ancestor->{Name} =~ m/A\-Z/) + { + # Clear out the current genres from the node, will be full of rubbish like "Authors A-K" + # Keep looping afterwards though, since there could be valid tags below the author + # specific ones + undef(@genre_list); + } + elsif ($ancestor->{Name} eq 'Subjects') + { + # Don't go deeper than a Subjects node + last; + } + else + { + # Add the current node to the temporary list, if it's not already included in either list + push @genre_list, $ancestor->{Name} + if ((!GCUtils::inArrayTest($ancestor->{Name}, @genre_list)) && + (!GCUtils::inArrayTest($ancestor->{Name}, @{$self->{curInfo}->{genre}}))); + } + } + + if ($genre ne 'ignore') + { + # Add temporary list to item info + push @{$self->{curInfo}->{genre}}, [$_] foreach @genre_list; + } + } + + # Let's sort the list for good measure + @{$self->{curInfo}->{genre}} = sort @{$self->{curInfo}->{genre}}; + + + # Fetch either the big original pic, or just the small thumbnail pic + if ($self->{bigPics}) + { + $self->{curInfo}->{cover} = $xml->{Items}->{Item}->{LargeImage}->{URL}; + } + else + { + $self->{curInfo}->{cover} = $xml->{Items}->{Item}->{SmallImage}->{URL}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://".$self->baseWWWamazonUrl(); + } + elsif ($url !~ m/sowacs.appspot.com/) + { + # Convert amazon url to aws url + $url =~ /\/dp\/(\w*)[\/|%3F]/; + my $asinid = $1; + $url = $self->baseAWSUrl."&Operation=ItemLookup&ResponseGroup=Large,EditorialReview&ItemId=".$asinid; + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $key = + ($self->{searchField} eq 'authors') ? 'Author' : + ($self->{searchField} eq 'title') ? 'Title' : + ($self->{searchField} eq 'isbn') ? 'Keywords' : + ''; + $word =~ s/\D//g + if $key eq 'Keywords'; + return $self->baseAWSUrl."&Operation=ItemSearch&$key=$word&SearchIndex=Books&ResponseGroup=Medium"; + } + + sub baseAWSUrl + { + my $self = shift; + return "http://sowacs.appspot.com/AWS/%5Bamazon\@gcstar.org%5D".$self->baseAmazonUrl()."/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=AKIAJJ5TJWI62A5OOTQQ&AssociateTag=AKIAJJ5TJWI62A5OOTQQ"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.com"; + } + + sub baseWWWamazonUrl + { + return "www.amazon.com"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getName + { + return "Amazon (US)"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub isPreferred + { + return 1; + } + + sub getSearchFieldsArray + { + return ['title', 'authors', 'isbn']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm new file mode 100644 index 0000000..eb51a4c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCAmazonCA;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCbooks::GCAmazon;
+
+{
+ package GCPlugins::GCbooks::GCPluginAmazonCA;
+
+ use base qw(GCPlugins::GCbooks::GCPluginAmazon);
+
+ sub baseWWWamazonUrl
+ {
+ return "www.amazon.ca";
+ }
+
+ sub baseAmazonUrl
+ {
+ return "ecs.amazonaws.ca";
+ }
+
+ sub getName
+ {
+ return "Amazon (CA)";
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+ sub isPreferred
+ {
+ return 0;
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm new file mode 100644 index 0000000..0c87502 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCbooks::GCAmazonDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonDE; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.de"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.de"; + } + + sub getName + { + return "Amazon (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm new file mode 100644 index 0000000..d87af48 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm @@ -0,0 +1,57 @@ +package GCPlugins::GCbooks::GCAmazonFR; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonFR; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.fr"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.fr"; + } + + sub getName + { + return "Amazon (FR)"; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm new file mode 100644 index 0000000..e39a2de --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCAmazonUK; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonUK; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.co.uk"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.co.uk"; + } + + sub getName + { + return "Amazon (UK)"; + } + + sub getLang + { + return 'EN'; + } + + sub isPreferred + { + return 0; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm b/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm new file mode 100644 index 0000000..36074aa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm @@ -0,0 +1,477 @@ +package GCPlugins::GCbooks::GCBDGest;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginBDGest;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+ # tableau pour stocker l'identifiant propre à bdgest
+ my @tableau;
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ # parse une liste de résultat
+ if ($self->{parsingList})
+ {
+ if (($tagname eq 'tr'))
+ {
+ $self->{isBook} = 1 ;
+ $self->{isUrl} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isBook}) && (index($attr->{href},"serie-") >= 0))
+ {
+ $self->{itemIdx}++;
+ $self->{isFound} = 1 ;
+ $tableau[$self->{itemIdx}] = substr($attr->{href},index( $attr->{href},"#")+1);
+ #on retravaille l'url pour avoir toutes les pages de la série
+ my $urlRecherche = substr($attr->{href},0,index($attr->{href},"."))."__10000".substr($attr->{href},index($attr->{href},"."));
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $urlRecherche;
+ $self->{isSerie} = 1 ;
+ $self->{isUrl} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} eq 'TitreAlbum')&& ($self->{isBook}) && ($attr->{title} ne ''))
+ {
+ $self->{isTitle} = 1 ;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title};
+ }
+ elsif (($tagname eq 'td') && $self->{isTitle} eq 1)
+ {
+ $self->{isPublisher} = 1 ;
+ $self->{isTitle} = 0;
+ }
+ elsif (($tagname eq 'td') && $self->{isPublisher} eq 2)
+ {
+ $self->{isPublication} = 1 ;
+ $self->{isPublisher} = 0;
+ }
+ }
+ else # parse un item
+ {
+ if (($tagname eq 'a') && ($attr->{name} eq $tableau[$self->{wantedIdx}]))
+ {
+ $self->{isTitle} = 1 ;
+ $self->{isCover} = 1;
+ $self->{isBook} = 1 ;
+ }
+ elsif ($tagname eq 'html')
+ {
+ $self->{isCover} = 0 ;
+ }
+ elsif ($tagname eq 'head')
+ {
+ $self->{isCover} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} ne $tableau[$self->{wantedIdx}]) && ($attr->{name} ne ''))
+ {
+ $self->{isBook} = 0 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{color} eq '#294A6B') && ($attr->{style} eq 'font-family:Trebuchet MS; FONT-SIZE: 11pt;') && ($self->{isTitle} eq 1))
+ {
+ $self->{isTitle} = 2 ;
+ }
+# elsif (($tagname eq 'a') && ($self->{isCover} eq 0) && (index($attr->{href},"Couvertures") >= 0))
+ elsif (($tagname eq 'a') && ($self->{isCover} eq 0))
+ {
+ my $urlimage = $attr->{href};
+ $urlimage =~ s/\'//g;
+ $urlimage =~ s/\)//g;
+ $urlimage = substr($urlimage,index($urlimage,"Couvertures/"));
+ $self->{curInfo}->{cover} = 'http://www.bedetheque.com/'.$urlimage;
+ }
+ elsif (($tagname eq 'a') && ($self->{isBook}) && (index($attr->{href},"auteur") >= 0))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'td') && ($self->{isPublisher} eq 1))
+ {
+ $self->{isPublisher} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isPublication} eq 1)
+ {
+ $self->{isPublication} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isEdition} eq 1)
+ {
+ $self->{isEdition} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isFormatPublication} eq 1)
+ {
+ $self->{isFormatPublication} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isPage} eq 1)
+ {
+ $self->{isPage} = 2 ;
+ }
+ elsif (($tagname eq 'i') && $self->{isDescription} eq 1)
+ {
+ $self->{isDescription} = 2 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{isFound} = 0;
+ $self->{inside}->{$tagname}--;
+
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isSerie})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ if ($self->{isPublisher} eq 1)
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 2 ;
+ }
+ if ($self->{isPublication} eq 1)
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Je reinitialise le champs s il est cense etre vide
+ $origtext =~ s/#TPFCHAMPSVIDE#//;
+ if ($self->{isTitle} eq 2)
+ {
+ # si le titre contient INT (cas intégrale et donc sans titre de la série) on concaténe la série au titre.
+ if($origtext =~ /INT/i)
+ {
+ # on enléve le préfixe INT ou int et le point
+ $origtext =~ s/INT//i;
+ $origtext =~ s/.//;
+ $self->{curInfo}->{title} = $self->{itemsList}[$self->{wantedIdx}]->{serie}." ".$origtext;
+ }
+ else
+ {
+ # si numéro avant titre on le transforme en tome et on concaténe avec le nom de la série.
+ if($origtext =~ /[0-9]./)
+ {
+ my $tome = substr($origtext,0,index($origtext,"."));
+ $tome =~ s/^\s+//;
+ my $titre = substr($origtext,index($origtext,".")+1);
+ $titre =~ s/^\s+//;
+ $self->{curInfo}->{title} = $self->{itemsList}[$self->{wantedIdx}]->{serie}." Tome ".$tome ." : ".$titre;
+ }
+ else
+ {
+ $self->{curInfo}->{title} = $origtext;
+ }
+ }
+ $self->{curInfo}->{web} = "http://www.bedetheque.com/".$self->{itemsList}[$self->{wantedIdx}]->{url};
+ $self->{curInfo}->{serie} = $self->{itemsList}[$self->{wantedIdx}]->{serie};
+ $self->{curInfo}->{language} = 'Français';
+ $self->{isTitle} = 0 ;
+ }
+ elsif (($self->{isAuthor}) && ($self->{nbAuthor} < 3))
+ {
+ # Enleve la virgule entre le nom et le prenom
+ $origtext =~ s/,//g;
+ if (($origtext ne '') && ($origtext ne '#TPF NOIR ET BLANC TPF#'))
+ {
+ $self->{curInfo}->{authors} .= $origtext;
+ $self->{curInfo}->{authors} .= ",";
+ }
+ $self->{isAuthor} = 0;
+ $self->{nbAuthor} = $self->{nbAuthor} + 1;
+ }
+ elsif ($self->{isPublisher} eq 2)
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 3 ;
+ }
+ elsif ($self->{isPublication} eq 2)
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 3 ;
+ }
+ elsif ($self->{isEdition} eq 2)
+ {
+ $self->{curInfo}->{edition} = $origtext;
+ $self->{isEdition} = 3 ;
+ }
+ elsif ($self->{isFormatPublication} eq 2 )
+ {
+ $self->{curInfo}->{format} = $origtext;
+ $self->{isFormatPublication} = 3 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $origtext =~ s/978\-//;
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 3 ;
+ }
+ elsif ($self->{isPage} eq 2)
+ {
+ $self->{curInfo}->{pages} = $origtext;
+ $self->{isPage} = 3 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ if($origtext ne '')
+ {
+ if($self->{curInfo}->{description} ne '')
+ {
+ $self->{curInfo}->{description} .= "\n\n";
+ }
+ $self->{curInfo}->{description} .= "Info sur cette edition : ".$origtext;
+ }
+ $self->{isDescription} = 3 ;
+ }
+ elsif ($self->{isBook} eq 1)
+ {
+ if (($origtext eq "Editeur :") && ($self->{isPublisher} ne 3))
+ {
+ $self->{isPublisher} = 1;
+ }
+ elsif (($origtext eq "Dépot légal :") && ($self->{isPublication} ne 3))
+ {
+ $self->{isPublication} = 1;
+ }
+ elsif (($origtext eq "Collection :") && ($self->{isEdition} ne 3))
+ {
+ $self->{isEdition} = 1;
+ }
+ elsif (($origtext eq "Taille :") && ($self->{isFormatPublication} ne 3))
+ {
+ $self->{isFormatPublication} = 1;
+ }
+ elsif (($origtext eq "ISBN :") && ($self->{isISBN} ne 3))
+ {
+ $self->{isISBN} = 1;
+ }
+ elsif (($origtext eq "Planches :") && ($self->{isPage} ne 3))
+ {
+ $self->{isPage} = 1;
+ }
+ elsif (($origtext eq "Info édition : ") && ($self->{isDescription} ne 3))
+ {
+ $self->{isDescription} = 1;
+ }
+ }
+ }
+ }
+
+ sub new
+ {
+ #la classe est instancié une seule fois au démarrage de l'appli.
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ serie => 1,
+ title => 1,
+ publication => 1,
+ format => 0,
+ edition => 1,
+ };
+ $self->{idPage} = 0;
+ $self->{nbAuthor} = 0;
+ $self->{isFound} = 0;
+ $self->{isSerie} = 0;
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isFormatPublication} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isISBN} = 0;
+ $self->{isPublication} = 0;
+ $self->{isFormat} = 0;
+ $self->{isSerie} = 0;
+ $self->{isPage} = 0;
+ $self->{isDescription} = 0;
+ $self->{isCover} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ #RAZ des variables (entre 2 recherches la classe reste en mémoire)
+ $self->{idPage} = 0;
+ $self->{nbAuthor} = 0;
+ $self->{isFound} = 0;
+ $self->{isSerie} = 0;
+ $self->{isEdition} = 0;
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isFormatPublication} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isISBN} = 0;
+ $self->{isPublication} = 0;
+ $self->{isFormat} = 0;
+ $self->{isSerie} = 0;
+ $self->{isPage} = 0;
+ $self->{isDescription} = 0;
+ $self->{isCover} = 0;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s|<u>||gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+# $html =~ s|<i>||gi;
+# $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+
+ # Quand un champs n est pas renseigne il peut y avoir un souci
+ $html =~ s|<td><font color="#5C7994"></font></td>|<td><font color="#5C7994"></font>#TPFCHAMPSVIDE#</td>|gi;
+
+ $html =~ s|<font color="#D19159">||gi;
+ $html =~ s|</font>||gi;
+ # Ce n est pas un coloriste donc il ne faut pas le rajouter a la liste des auteurs
+ $html =~ s|<N&B>|#TPF NOIR ET BLANC TPF#|gi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+ # si isbn renseigné alors url de recherche différent
+ if((length($word) eq 13 || length($word) eq 10) && ($word =~ /[0-9]/))
+ {
+ # si contient pas de caractére - alors insertion de ceux ci pour recherche chez bdgest (ISBN sur 10 au lieu de 13)
+ if($word =~ /\-/)
+ {
+ $word =~ s/978\-//;
+ return "http://www.bedetheque.com/index.php?R=1&RechISBN=". $word;
+ }
+ else
+ {
+ # Ajouts des - et enléve le 978 en début si présent
+ $word =~ s/978//;
+
+ #re calcul de la clé de vérification
+ my $total = substr($word,0,1)*10;
+ $total = $total + (substr($word,1,1)*9);
+ $total = $total + (substr($word,2,1)*8);
+ $total = $total + (substr($word,3,1)*7);
+ $total = $total + (substr($word,4,1)*6);
+ $total = $total + (substr($word,5,1)*5);
+ $total = $total + (substr($word,6,1)*4);
+ $total = $total + (substr($word,7,1)*3);
+ $total = $total + (substr($word,8,1)*2);
+ $total = 11 - ($total%11);
+
+ if($total eq 10)
+ {
+ $total = 'X';
+ }
+
+ $word = substr($word,0,1)."-".substr($word,1,2)."%25-%25".substr($word,7,2)."-".$total;
+ return "http://www.bedetheque.com/index.php?R=1&RechISBN=". $word;
+ }
+ }
+ else
+ {
+ return "http://www.bedetheque.com/index.php?R=1&RechSerie=". $word;
+ }
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.bedetheque.com/" . $url;
+ }
+
+ sub getName
+ {
+ return "BDGest";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-1";
+ }
+
+ sub getAuthor
+ {
+ return 'Rataflo';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn','title'];
+ }
+
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm b/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm new file mode 100644 index 0000000..927be0b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm @@ -0,0 +1,374 @@ +package GCPlugins::GCbooks::GCbooksBibliotekaNarodowa; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchISBN = ""; + +{ + package GCPlugins::GCbooks::GCPluginBibliotekaNarodowa; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'frameset') #od razu mamy wynik + { + $self->{isBook} = 7; + $self->{itemIdx}++; + } + if (($tagname eq 'frame') && ($attr->{name} eq 'bib_frame') && $self->{isBook} == 7) #od razu mamy wynik + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://alpha.bn.org.pl".$attr->{src}; + $self->{isUrl} = 0; + $self->{isBook} = 0; + } + + if (($tagname eq 'tr') && ($attr->{class} eq 'browseEntry')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'td') && ($attr->{class} eq 'browseEntryData') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 2; + } + if (($tagname eq 'a') && ($self->{isBook} == 1) && ($self->{isAuthor} > 0)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://alpha.bn.org.pl".$attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{url} =~ s|frameset|bibframe|; + $self->{isUrl} = 0; + $self->{isAuthor} = 0; + $self->{isTitle} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'browseEntryYear') && ($self->{isBook} == 1)) + { + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgTIAUTR')) + { + $self->{isTitle} = 1; + $self->{isAuthor} = 1; + $self->{isTranslator} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBPD')) + { + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgSERIA')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($self->{parsingList}) + { + if (($tagname eq 'tr') && ($self->{isBook} == 1)) + { + $self->{isBook} = 0; + } + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s|^\s*||m; + $origtext =~ s|\s*$||m; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 1) + { + if ($self->{isTitle} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} > 0) + { + $origtext =~ s|\s*\/\s*(.*)\s*;|$1|; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $1; + $self->{isAuthor} = 1; + } + if ($self->{isPublication} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublication} = 0; + } + } + + } + else + { + if (($self->{isTitle} == 1) && ($self->{isAuthor} == 1) && ($self->{isTranslator} == 1)) + { + my ($ti, $au, $tr, $bubu); + $origtext =~ m|.*(\.){1}$|; + $bubu = $1; + if ($bubu eq '.') + { + $origtext =~ s|(.*)\.$|$1|; + } + $origtext =~ m/([^\/]+)(\/\s+[^;]*)?(;\s*.*(tł|przeł|przekł)\..*)?$/; +# $origtext =~ m|([^/]+)(/\s[^;]+)?(;.*)?$|; + $ti = $1; + $au = $2; + $tr = $3; + $ti =~ s|([^:]*):?.*$|$1|; + $ti =~ s|\s*$||; + $self->{curInfo}->{title} = $ti; + $self->{isTitle} = 0; + $au =~ s|^(.*)il\..*$|$1|; + $au =~ s/(\/|tekst)//g; + $au =~ s| i |,|g; + $au =~ s|, |,|g; + $au =~ s|^\s*||; + $au =~ s|\s*$||; + $self->{curInfo}->{authors} = $au; + $self->{isAuthor} = 0; + $tr =~ s|[\[\]]||g; + $tr =~ s/;\s*.*(tł|przeł|przekł)\. (\[.*\] )?(.*)\.?$/$3/; + $tr =~ s|(z \w+\. )?(.*)|$2|; + $self->{curInfo}->{translator} = $tr; + $self->{isTranslator} = 0; + } + if (($self->{isPublisher} == 1) && ($self->{isPublication} == 1)) + { + my ($pu, $pd); + $origtext =~ m|(.*)\s:\s(.*),\s(.*)|; + $pu = $2; + $pd = $3; + $pu =~ s|([^"]*")?([^"]*)"?|$2|; + $pu =~ s|[\[\]]||g; + $self->{curInfo}->{publisher} = $pu; + $self->{isPublisher} = 0; + $pd =~ s|[^\d]||g; + $self->{curInfo}->{publication} = $pd; + $self->{isPublication} = 0; + } + if ($self->{isISBN} eq '1') + { + my ($pom1, $pom2); + if ($self->{searchField} eq 'isbn') + { + $pom1 = $self->{searchISBN}; + $pom2 = $origtext; + $pom2 =~ s|[^\dX]||g; + $pom1 =~ s|-||g; + $pom2 =~ s|-||g; + if ($pom1 eq $pom2) + { + $self->{curInfo}->{isbn} = $self->{searchISBN}; + } + else + { + $self->{curInfo}->{isbn} = $origtext; + } + } + else + { + $self->{curInfo}->{isbn} = $origtext; + } + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $origtext =~ s|[\[\]]||g; + $origtext =~ s|(.*)\.{1}$|$1|; + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isSerie} eq '1') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + + if ($self->{parsingList}) + { + $html =~ s|<b>(.*?)</b>|$1|gms; + $html =~ s|<td class="browseEntryData">\s*<a(.*)/a>\s*(.*)\s*|<td class="browseEntryData">$2<a$1/a>|gm; + } + else + { + $html =~ s|</?strong>||gi; + $html =~ s|</?br>||gi; + $html =~ s|</?i>||gi; + + $html =~ s|<td.*>ISBN</td>\s*<.*>\s*(\w*)</td>|<div id="wrgISBN">$1</div>|m; + $html =~ s|<td.*>Seria</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgSERIA">$1</div>|m; + $html =~ s|<div id="wrgSERIA">(.*)( / [^<]*)</div>|<div id="wrgSERIA">$1</div>|; + $html =~ s|<td.*>Opis fiz</td>\s*<.*>\s*(\d*)\D.*</td>|<div id="wrgPAGES">$1</div>|m; + $html =~ s|<td.*>TytuŁ</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgTIAUTR">$1</div>|m; + $html =~ s|<td.*>Adres wyd</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgPUBPD">$1</div>|m; + $html =~ s|<td.*>Wydanie</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgEDITI">$1</div>|m; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + my $bubu; + if ($self->{searchField} eq 'isbn') + { + $bubu = "i"; + $self->{searchISBN} = $word; + } + else + { + $bubu = "t"; + $self->{searchISBN} = ""; + } + return "http://alpha.bn.org.pl/search*pol/".$bubu."?SEARCH=".$word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://alpha.bn.org.pl' + } + + sub getName + { + return "Biblioteka Narodowa"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm b/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm new file mode 100644 index 0000000..d32c1f4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm @@ -0,0 +1,295 @@ +package GCPlugins::GCbooks::GCBokkilden; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBokkilden; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq 'h1') && ($attr->{class} eq 'normal')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + elsif ($self->{isBook}) + { + if ($tagname eq 'a') + { + if (($attr->{href} =~ /produkt\.do/) + && (!$self->{itemsList}[$self->{itemIdx}]->{title})) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1; + } + elsif ($attr->{href} =~ /sok\.do\?enkeltsok/) + { + $self->{isAuthor} = 1; + } + } + } + } + else + { + if ($tagname eq 'table') + { + $self->{isBook} = 1 + if ($attr->{class} eq 'bokfaktatabell'); + } + elsif ($tagname eq 'div') + { + $self->{isCover} = 1 if ($attr->{class} eq 'img-ilus') + && ($attr->{style} eq 'width:120px;'); + $self->{is} = 'description' if $attr->{id} eq 'omtale-hidden'; + } + elsif ($tagname eq 'img') + { + if ($self->{isCover}) + { + $self->{curInfo}->{cover} = 'http://www.bokkilden.no/SamboWeb/' + . $attr->{src}; + $self->{isCover} = 0; + } + } + elsif ($tagname eq 'h1') + { + $self->{h1Style} = $attr->{style}; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{title}) + { + $self->{parsingEnded} = 1 if $origtext !~ /S..?k p..?/; + } + + elsif ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isAuthor}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ',' + if $self->{itemsList}[$self->{itemIdx}]->{authors}; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + $self->{isAuthor} = 0; + } + elsif ($self->{isBook}) + { + if ($origtext =~ / \| /) + { + $origtext =~ /(\d{4})/; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $1; + $self->{isBook} = 0; + } + } + } + else + { + if ($self->{is}) + { + $origtext =~ s/^\s*//; + $self->{curInfo}->{$self->{is}} = $origtext; + if ($self->{is} eq 'genre') + { + $self->{curInfo}->{genre} =~ s/;\s*/,/g; + } + elsif ($self->{is} eq 'pages') + { + $self->{curInfo}->{pages} =~ s/[^0-9]//g; + } + $self->{is} = ''; + } + elsif ($self->{inside}->{title}) + { + $self->{tmpTitle} = $origtext; + } + elsif ($self->{inside}->{h1}) + { + if (!$self->{curInfo}->{title}) + { + if ($self->{h1Style}) + { + $self->{tmpTitle} =~ /\s*(.*?) av (.*?) »/gim; + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{authors} = $2; + } + else + { + $self->{curInfo}->{title} = $origtext; + } + } + } + elsif ($self->{inside}->{author}) + { + $self->{curInfo}->{authors} .= ',' + if $self->{curInfo}->{authors}; + $self->{curInfo}->{authors} .= $origtext; + } + if ($self->{inside}->{translator}) + { + $self->{curInfo}->{translator} .= ', ' + if $self->{curInfo}->{translator}; + $self->{curInfo}->{translator} .= $origtext; + } + elsif (($self->{isBook}) && $self->{inside}->{b}) + { + $self->{is} = + ($origtext eq 'Utgitt: ') ? 'publication' + : ($origtext eq 'Forlag: ') ? 'publisher' + : ($origtext eq 'Innb.: ') ? 'format' + : ($origtext =~ /Spr..?k:/) ? 'language' + : ($origtext eq 'Sider: ') ? 'pages' + : ($origtext eq 'ISBN: ') ? 'isbn' + : ($origtext eq 'Utgave: ') ? 'edition' + : ($origtext eq 'Genre:') ? 'genre' + : ''; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 0, + }; + + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{isBook} = 0; + if ($self->{parsingList}) + { + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + } + else + { + $self->{is} = ''; + $self->{isCover} = 0; + $html =~ s|<a href="emneliste\.do\?emnekode=[.0-9]*">(.*?)</a>|$1|gim; + $html =~ s|<a href="sok\.do\?enkeltsok=[^"]*">([^<]*)</a>|<author>$1</author>|gim; + #" + $html =~ s|<a href="sok\.do\?.*?rolle1=Oversetter">(.*?)</a>|<translator>$1</translator>|gim; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.bokkilden.no/SamboWeb/sok.do?rom=MP&enkeltsok=$word&innsnevre=ja"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.bokkilden.no/SamboWeb/$url" + if $url !~ m|http://www.bokkilden.no/|; + return $url; + } + + sub getCharset + { + my $self = shift; + + return 'UTF-8'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getName + { + return 'Bokkilden'; + } + + sub getLang + { + return 'NO'; + } + + sub getAuthor + { + return 'Tian'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBol.pm b/lib/gcstar/GCPlugins/GCbooks/GCBol.pm new file mode 100644 index 0000000..6e882b1 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBol.pm @@ -0,0 +1,485 @@ +package GCPlugins::GCbooks::GCBol; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBol; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'td') + { + if($self->{bookStep} == 0) + { + $self->{bookStep} = 1 ; + } + } + elsif ($tagname eq 'img') + { + if($self->{bookStep} == 1) + { + $self->{bookStep} = 2; + } + } + elsif ($tagname eq 'a') + { + if($self->{bookStep}==2) + { + $self->{url} = "http://www.bol.it" . $attr->{href}; + $self->{bookStep} = 3 ; + $self->{isTitle} = 1 ; + } + elsif($self->{bookStep}==3) + { + $self->{bookStep} = 4 ; + $self->{isAuthor} = 1 ; + } + } + elsif ($tagname eq 'br') + { + if($self->{bookStep}==4) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{url}; + $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{title}; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{author}; + + $self->{isFormat} = 1 ; + #$self->{bookStep} = 0 ; + } + } + elsif ( + (($tagname ne 'h3') || ( ($tagname eq 'h3') && ($self->{bookStep} != 2) )) && + (($tagname ne 'p') || ( ($tagname eq 'p') && ($self->{bookStep} != 3) )) && + (($tagname ne 'span') || ( ($tagname eq 'span') && ($self->{bookStep} != 4) )) + ) + { + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isCover} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + + $self->{isBook} = 0; + $self->{bookStep} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{class} eq 'cover')) + { + $self->{curInfo}->{cover} = "http://www.bol.it" . $attr->{src}; + $self->{bookStep} = 1; + } + elsif (($tagname eq 'h1') && ($self->{bookStep} == 1)) + { + $self->{curInfo}->{title} = "http://www.bol.it" . $attr->{src}; + $self->{isTitle} = 1; + $self->{bookStep} = 2; + } + elsif ($self->{bookStep} == 2) + { + if (($tagname eq 'a') && ($self->{areAuthors} == 0)) + { + $self->{isAuthor} = 1; + $self->{areAuthors} = 1; + } + if ($self->{areAuthors} == 1) + { + if ($tagname eq 'a') + { + $self->{isAuthor} = 1; + } + else + { + $self->{bookStep} = 3; + $self->{areAuthors} = 0; + } + } + } + elsif ($self->{bookStep} == 4) + { + if (($tagname eq 'a') && ($self->{areGenres} == 0)) + { + $self->{isGenre} = 1; + $self->{areGenres} = 1; + } + if ($self->{areGenres} == 1) + { + if ($tagname eq 'a') + { + $self->{isGenre} = 1; + } + else + { + $self->{bookStep} = 5; + $self->{areGenres} = 0; + } + } + } + elsif ($self->{bookStep} == 6) + { + if (($tagname eq 'a') && ($self->{areTranslators} == 0)) + { + $self->{isTranslator} = 1; + $self->{areTranslators} = 1; + } + if ($self->{areTranslators} == 1) + { + if ($tagname eq 'a') + { + $self->{isTranslator} = 1; + } + else + { + $self->{bookStep} = 6; + $self->{areTranslators} = 0; + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isAuthor}) + { + $self->{author} = $origtext; + $self->{isAuthor} = 0; + } + elsif ($self->{isFormat}) + { + my @array = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{format} = $array[0]; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/^\s+//; + $self->{isFormat} = 0; + $self->{isPublisher} = 1; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0; + $self->{isPublication} = 1; + } + elsif ($self->{isPublication}) + { + my @array = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[1]; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//; + $self->{isPublication} = 0; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + + if ($origtext eq 'I contenuti') + { + $self->{isDescription} = 1; + } + elsif ($origtext eq 'Formato:') + { + $self->{isFormat} = 1; + } + elsif (substr($origtext,0,7) eq 'Pagine:') + { + $self->{isPage} = 1; + } + elsif ($origtext eq 'Lingua:') + { + $self->{isLanguage} = 1; + } + elsif ($origtext eq 'Editore:') + { + $self->{isPublisher} = 1; + } + elsif ($origtext eq 'Anno di pubblicazione') + { + $self->{isPublication} = 1; + } + elsif ($origtext eq 'Codice EAN:') + { + $self->{isISBN} = 1; + } + elsif (($origtext eq 'Traduttore:') || ($origtext eq 'Traduttori:')) + { + $self->{bookStep} = 6; + } + elsif ($origtext eq 'Generi:') + { + $self->{bookStep} = 4; + } + elsif ($origtext ne '') + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{curInfo}->{authors} eq '') + { + $self->{curInfo}->{authors} = $origtext; + } + else + { + $self->{curInfo}->{authors} .= ", " . $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + elsif ($self->{isLanguage}) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isGenre}) + { + if ($self->{curInfo}->{genre} eq '') + { + $self->{curInfo}->{genre} = $origtext; + } + else + { + $self->{curInfo}->{genre} .= ", " . $origtext; + } + $self->{isGenre} = 0 ; + } + elsif ($self->{isTranslator}) + { + if ($self->{curInfo}->{translator} eq '') + { + $self->{curInfo}->{translator} = $origtext; + } + else + { + $self->{curInfo}->{translator} .= ", " . $origtext; + } + $self->{isTranslator} = 0 ; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isCover} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + $self->{areAuthors} = 0; + $self->{areGenres} = 0; + $self->{areTranslators} = 0; + + $self->{isBook} = 0; + $self->{bookStep} = 0; + $self->{title} = 0; + $self->{author} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<br><i>|<i>|gi; + #$html =~ s/[\n\r\t]//g; + } + else + { + my $found = index($html,'<a name="commenti">'); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + #$html =~ s|<br>|\n|gi; + #$html =~ s|<br />|\n|gi; + #$html =~ s|<b>||gi; + #$html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return ('http://www.bol.it/libri/ricerca', ["crc" => "100", "crcselect" => "100", "g" => "$word", "tpr" => "10"] ); + } + else + { + $word =~ s/\+/ /g; + return ('http://www.bol.it/libri/ricerca', ["crc" => "100", "crcselect" => "100", "g" => "$word", "tpr" => "10"] ); + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.bol.it'; + } + + sub getName + { + return "Bol"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF, UnclePetros'; + } + + sub getLang + { + return 'IT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm b/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm new file mode 100644 index 0000000..ad46177 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm @@ -0,0 +1,479 @@ +package GCPlugins::GCbooks::GCBuscape; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBuscape; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'a') && ($attr->{class} eq 'xu')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (( $attr->{class} eq 'xj') && ($self->{itemIdx} eq '-1') && ($self->{searchField} eq 'isbn')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'meta') && ($self->{itemIdx} eq '-1') && ($self->{searchField} eq 'isbn')) + { + my $html = $self->loadPage($self->{loadedUrl}, 0, 1); + my $found = index($html,"URL="); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('URL='),length($html)- $found -length('URL=')); + $html = substr($html, 0, index($html,"\"")); + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $html; + } + } + } + else + { + if (( $attr->{class} eq 'xj') && ($self->{isAnalyse} eq 0)) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{onerror} ne '') && ($self->{curInfo}->{title} eq '')) + { + # Attention il y a 2 formats differents pour ce site + if ($attr->{alt} ne '') + { + $self->{curInfo}->{title} = $attr->{alt}; + } + if ($attr->{title} ne '') + { + my @array = split(/\(/,reverse($attr->{title})); + my @array2; + if ($array[1] ne '') + { + $self->{curInfo}->{isbn} = reverse($array[0]); + $self->{curInfo}->{isbn} =~ s/\)//; + # J enleve le premier champs qui est sense etre le code ISBN + shift(@array); + my $element1; + my $element2; + foreach $element1 (@array) + { + if ($element2 eq '') + { + $element2 = $element1; + } + else + { + $element2 .= "(" .$element1; + } + } + @array2 = split(/-/,$element2); + } + else + { + @array2 = split(/-/,$array[0]); + } + + if ($array2[1] ne '') + { + # J enleve le dernier champs qui est l auteur + shift(@array2); + } + my $element; + foreach $element (@array2) + { + if ($self->{curInfo}->{title} eq '') + { + $self->{curInfo}->{title} = $element; + } + else + { + $self->{curInfo}->{title} .= "-" .$element; + } + } + $self->{curInfo}->{title} = reverse($self->{curInfo}->{title}); + } + + $self->{curInfo}->{cover} = $attr->{src}; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my @array = split(/\(/,reverse($origtext)); + my @array2; + if ($array[1] ne '') + { + # J enleve le premier champs qui est sense etre le code ISBN + shift(@array); + my $element1; + my $element2; + foreach $element1 (@array) + { + if ($element2 eq '') + { + $element2 = $element1; + } + else + { + $element2 .= "(" .$element1; + } + } + @array2 = split(/-/,$element2); + } + else + { + @array2 = split(/-/,$array[0]); + } + + if ($array2[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = reverse($array2[0]); + my $found = index($self->{itemsList}[$self->{itemIdx}]->{authors}," Cod:"); + if ( $found >= 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = substr($self->{itemsList}[$self->{itemIdx}]->{authors}, 0, $found); + } + # Enleve les blancs en debut de chaine + $self->{itemsList}[$self->{itemIdx}]->{authors} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{authors} =~ s/\s+$//g; + shift(@array2); + } + my $element; + foreach $element (@array2) + { + if ($self->{itemsList}[$self->{itemIdx}]->{title} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $element; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{title} .= "-" .$element; + } + } + $self->{itemsList}[$self->{itemIdx}]->{title} = reverse($self->{itemsList}[$self->{itemIdx}]->{title}); + $self->{isTitle} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isAnalyse} eq 1) + { + if ($origtext =~ m/Autor/i) + { + $self->{isAuthor} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Editora/i) + { + $self->{isPublisher} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Ano de edi/i) + { + $self->{isPublication} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/N.* de p.*ginas/i) + { + $self->{isPage} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/ISBN/i) + { + $self->{isISBN} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Encaderna/i) + { + $self->{isFormat} = 1 ; + $self->{isAnalyse} = 2 ; + } + else + { + $self->{isAnalyse} = 0 ; + } + + } + elsif ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isAuthor} eq 2) + { + if ($origtext =~ m/N.*o Cadastrado/i) + { + } + else + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{curInfo}->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[0]; + } + } + } + + $self->{isAuthor} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isISBN} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPublisher} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPublication} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPage} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{curInfo}->{format} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isFormat} = 0 ; + $self->{isAnalyse} = 0 ; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<br><i>|<i>|gi; + } + else + { + my $found = index($html,'<a name="commenti">'); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + + $html =~ s|<!--||gi; + $html =~ s|<strong>||gi; + $html =~ s|</strong>|<tpfnesertarien>TPFNESERTARIEN</tpfnesertarien><tpfnesertarien></tpfnesertarien>|gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://compare.buscape.com.br/proc_unico?id=3482&Carac1000000000=" .$word; + } + else + { + return "http://compare.buscape.com.br/proc_unico?id=3482&Carac1000000000=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + # Attention il y a 2 formats differents pour ce site + if ($url =~ m/counter_livro.asp/i) + { + my $html = $self->loadPage($url, 0, 1); + my $found = index($html,"URL="); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('URL='),length($html)- $found -length('URL=')); + $html = substr($html, 0, index($html,"\"")); + } + return $html; + } + + return $url; + } + + sub getName + { + return "Buscape"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['ISBN', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm b/lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm new file mode 100644 index 0000000..5d8f7e4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm @@ -0,0 +1,420 @@ +package GCPlugins::GCbooks::GCbooksCasadelibro; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginCasadelibro; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'p') && ($attr->{class} eq 'tit')) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + } + elsif (($tagname eq 'a') && ($self->{isBook}) && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.casadelibro.com" . $attr->{href}; + $self->{isUrl} = 0 ; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'p') && ($attr->{class} eq 'liz')) + { + $self->{isBook} = 0 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'autor') && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} =~ m/autor/i) && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'p') && ($attr->{class} eq 'puestoEditorial') && ($self->{isBook})) + { + $self->{isEditionPublication} = 1 ; + } + } + else + { + if ($self->{isLanguage} eq 1) + { + $self->{isLanguage} = 2 ; + } + elsif ($self->{isEdition} eq 1) + { + $self->{isEdition} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'tit_ficha')) + { + $self->{isTitle} = 1 ; + } + elsif ($tagname eq 'tpfnoauthortpf') + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'autor2')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'edicion_ficha')) + { + $self->{isPublisher} = 1 ; + } + elsif ($tagname eq 'tpfstarttagtpf') + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'txt_resumen')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{id} eq 'imgFicha') && ($attr->{src} ne '/l/grande.gif')) + { + $self->{curInfo}->{cover} = "http://www.casadelibro.com" . $attr->{src} ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isEditionPublication}) + { + $_= $origtext; + if (/(.*),\s([0-9][0-9][0-9][0-9]$)/) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $1; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + } + + $_= $origtext; + if (/(.*)\s([0-9][0-9][0-9][0-9]$)/) + { + $self->{itemsList}[$self->{itemIdx}]->{publication} = $2; + } + + $self->{isEditionPublication} = 0 ; + } + } + else + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isLanguage} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{language} = $array[1]; + $self->{curInfo}->{language} =~ s/^\s//; + $self->{curInfo}->{language} =~ s/\s+$//; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isEdition} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{edition} = $array[1]; + $self->{curInfo}->{edition} =~ s/^\s//; + $self->{curInfo}->{edition} =~ s/\s+$//; + $self->{isEdition} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{format} = $array[1]; + $self->{curInfo}->{format} =~ s/^\s//; + $self->{curInfo}->{format} =~ s/\s+$//; + $self->{isFormat} = 0 ; + } + elsif ($self->{isSerie} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{serie} = $array[1]; + $self->{curInfo}->{serie} =~ s/^\s//; + $self->{curInfo}->{serie} =~ s/\s+$//; + $self->{isSerie} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s/^\s//; + $self->{curInfo}->{publication} =~ s/\s+$//; + $self->{isPublication} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isLanguage} = 1 if ($origtext =~ m/Lengua/i); + $self->{isEdition} = 1 if ($origtext =~ m/^n(.*)\sEdici/i); + $self->{isFormat} = 1 if ($origtext =~ m/Encuadernaci/i); + $self->{isSerie} = 1 if ($origtext =~ m/Colecci/i); + $self->{isPublication} = 1 if ($origtext =~ m/^A(.*)o de Edici/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= $nom_prenom[0]; + } + $self->{curInfo}->{authors} .= ","; + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{isbn} = $array[1]; + $self->{curInfo}->{isbn} =~ s/^\s//; + $self->{isISBN} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{curInfo}->{description} =~ s/\t//g; + $self->{curInfo}->{description} =~ s/^\s//; + $self->{curInfo}->{description} =~ s/\s+$//; + $self->{isDescription} = 0 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditionPublication} = 0 ; + $self->{isAnalyse} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isLanguage} = 0; + $self->{isEdition} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|'| |gi; + } + else + { + my $found = index($html,"<div class=\"azul3\">"); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|"tit_ficha"><strong>|"tit_ficha">|gi; + $html =~ s|de </span>|<TPFNOAUTHORTPF>|gi; + $html =~ s|<strong>|<TPFSTARTTAGTPF>|gi; + $html =~ s|</strong>|<TPFSTOPTAGTPF>|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + + return "http://www.casadellibro.com/busquedas/resultados2?titbus=&autorbus=&isbnbus=" . $word. "&editbus=&idibus=0&encbus=0&sl1=-1"; +# return "http://www.casadellibro.com/busquedas/quickResults/0,,1-i-" . $word. ",00.html?tBusq=t&tValueForSearch=" .$word. "&cFo=true&rOd=&NotQueryAgain=false"; + } + else + { + my $word2 = $word; + $word2 =~ s|\+|%20|gi; + return "http://www.casadellibro.com/busquedas/quickResults2/0,," . $word2. ",00.html?Buscar=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.casadellibro.com/'; + } + + sub getName + { + return "Casadelibro"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'ES'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm b/lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm new file mode 100644 index 0000000..242871c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm @@ -0,0 +1,430 @@ +package GCPlugins::GCbooks::GCChapitre;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginChapitre;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+
+ if (($tagname eq 'a') && ( $attr->{id} =~ m/ctl00_PHCenter_SearchResult1_rpResult_ctl.._searchResultTitle_hlProduct/))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.chapitre.com" . $attr->{href};
+ $self->{isTitle} = 1 ;
+ }
+ elsif ($tagname eq 'tpfauthortpf')
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif ($tagname eq 'strong')
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ }
+ else
+ {
+ if ($self->{isAuthor} eq 2)
+ {
+ if ($tagname ne 'a')
+ {
+ $self->{isAuthor} = 0 ;
+ }
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'clear'))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($tagname eq 'td')
+ {
+ if ($self->{isPublisher} eq 1)
+ {
+ $self->{isPublisher} = 2 ;
+ }
+ elsif ($self->{isPublication} eq 1)
+ {
+ $self->{isPublication} = 2 ;
+ }
+ elsif ($self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif ($self->{isLanguage} eq 1)
+ {
+ $self->{isLanguage} = 2 ;
+ }
+ elsif ($self->{isCollection} eq 1)
+ {
+ $self->{isCollection} = 2 ;
+ }
+ elsif ($self->{isGenre} eq 1)
+ {
+ $self->{isGenre} = 2 ;
+ }
+ }
+ elsif (($tagname eq 'a') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductTitle1_linkTitleProduct'))
+ {
+ $self->{isTitle} = 1 ;
+ }
+ elsif (($tagname eq 'div') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductTitle1_pnlAuthor'))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'h2') && ( $self->{isAuthor} eq 1))
+ {
+ $self->{isAuthor} = 2 ;
+ }
+ elsif (($tagname eq 'div') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductTitle1_pnlTranslator'))
+ {
+ $self->{isTranslator} = 1 ;
+ }
+ elsif (($tagname eq 'tpftraducteurtpf') && ( $self->{isTranslator} eq 1))
+ {
+ $self->{isTranslator} = 2 ;
+ }
+ elsif (($tagname eq 'img') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductPicture1_imgProduct') && ( index($attr->{src},"http://images.chapitre.com/indispo") eq -1 ))
+ {
+ $self->{curInfo}->{cover} = $attr->{src};
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'presentation'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ elsif (($tagname eq 'tpfdescriptiontpf') && ($self->{isDescription} eq 1))
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif ($tagname eq 'th')
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( $attr->{href} =~ m|/CHAPITRE/fr/search/Default.aspx\?collection=|i))
+ {
+ $self->{isCollection} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ( $attr->{href} =~ m|/CHAPITRE/fr/search/Default.aspx\?themeId=|i))
+ {
+ $self->{isGenre} = 2 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
+ }
+ else
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isPublisher} = 1 if ($origtext =~ m/Editeur :/i);
+ $self->{isSerie} = 1 if ($origtext =~ m/Collection :/i);
+ $self->{isPublication} = 1 if ($origtext =~ m/Date :/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ my @array = split(/\n/,$origtext);
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0];
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isPublication})
+ {
+ my @array = split(/\n/,$origtext);
+ $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[0];
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ my @array = split(/\n/,$origtext);
+ $self->{itemsList}[$self->{itemIdx}]->{serie} = $array[0];
+ $self->{isSerie} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{isTitle})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 2)
+ {
+ if ( $origtext ne '')
+ {
+ my @array = split(/;/,$origtext);
+ my $element;
+ foreach $element (@array)
+ {
+ my @nom_prenom = split(/,/,$element);
+ # Enleve les blancs en debut de chaine
+ $nom_prenom[0] =~ s/^\s//;
+ $nom_prenom[1] =~ s/^\s//;
+ # Enleve les blancs en fin de chaine
+ $nom_prenom[0] =~ s/\s+$//;
+ $nom_prenom[1] =~ s/\s+$//;
+ if ($self->{curInfo}->{authors} eq '')
+ {
+ if ($nom_prenom[1] ne '')
+ {
+ $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0];
+ }
+ else
+ {
+ $self->{curInfo}->{authors} = $nom_prenom[0];
+ }
+ }
+ else
+ {
+ if ($nom_prenom[1] ne '')
+ {
+ $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0];
+ }
+ else
+ {
+ $self->{curInfo}->{authors} .= ", " . $nom_prenom[0];
+ }
+ }
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ }
+ elsif ($self->{isTranslator} eq 2)
+ {
+ $self->{curInfo}->{translator} = $origtext;
+ $self->{isTranslator} = 0 ;
+ }
+ elsif ($self->{isPublisher} eq 2)
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ $self->{curInfo}->{description} = $origtext;
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($self->{isPublication} eq 2)
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isPublication} = 1 if ($origtext =~ m/parution/i);
+ $self->{isISBN} = 1 if ($origtext =~ m/EAN13/i);
+ $self->{isPublisher} = 1 if ($origtext =~ m/Editeur/i);
+ $self->{isLanguage} = 1 if ($origtext =~ m/Langue/i);
+ $self->{isCollection} = 1 if ($origtext =~ m/Collection/i);
+ $self->{isGenre} = 1 if ($origtext =~ m/Genre/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 0 ;
+ }
+ elsif ($self->{isLanguage} eq 2)
+ {
+ $self->{curInfo}->{language} = $origtext;
+ $self->{isLanguage} = 0 ;
+ }
+ elsif ($self->{isCollection} eq 2)
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isCollection} = 0 ;
+ }
+ elsif ($self->{isGenre} eq 2)
+ {
+ $origtext =~ s|/|,|gi;
+ $self->{curInfo}->{genre} = $origtext;
+ $self->{isGenre} = 0 ;
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 1,
+ format => 0,
+ edition => 1,
+ serie => 1,
+ };
+
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isSerie} = 0;
+ $self->{isPublication} = 0;
+ $self->{isAnalyse} = 0;
+ $self->{isDescription} = 0;
+ $self->{isISBN} = 0;
+ $self->{isLanguage} = 0;
+ $self->{isCollection} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isGenre} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|</a>,|</a>,<tpfauthortpf>|gi;
+ }
+ else
+ {
+
+ $html =~ s|</strong>|</strong><tpftraducteurtpf>|gi;
+ $html =~ s|</h3>|</h3><tpfdescriptiontpf>|gi;
+
+ $html =~ s|<u>||gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ $word =~ s/\+/ /g;
+ return ('http://www.chapitre.com/CHAPITRE/fr/search/Default.aspx?search=true', ["quicksearch" => "$word"] );
+
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url;
+ }
+
+ sub getName
+ {
+ return "Chapitre.com";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-15";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['ISBN', 'title'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm b/lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm new file mode 100644 index 0000000..927e099 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm @@ -0,0 +1,238 @@ +package GCPlugins::GCbooks::GCDoubanbook; + +################################################### +# +# Copyright 2005-2010 Bai Wensimi +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginDoubanbook; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if (($page =~ /^bad isbn/) & ($page =~ /^The/)); + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page =~ /feed>$/) + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + foreach my $ItemBook ( @{$xml->{'entry'}}){ + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $ItemBook->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $ItemBook->{'title'}; + foreach my $tmp_author (@{$ItemBook->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'authors'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=',';} + $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=$tmp_author->{'name'}; + } + } + } + else + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $xml->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $xml->{'title'}; + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=$tmp_author->{'name'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=','; + } + } + } + else + { + $xml =$xs->XMLin($page, + ForceArray => [ 'author' ], + KeyAttr => {'db:tag'=>'name','link'=>'rel'}); + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{curInfo}->{authors}.=$tmp_author->{'name'}; + $self->{curInfo}->{authors}.=','; + } + $self->{curInfo}->{title}=$xml->{'title'}; + $self->{curInfo}->{description}=$xml->{'summary'}; + $self->{curInfo}->{web}=$xml->{'link'}->{'alternate'}->{'href'}; + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH: { + $db_attr eq 'publisher' and $self->{curInfo}->{publisher}=$check->{'content'} ,last; + $db_attr eq 'pubdate' and $self->{curInfo}->{publication}=$check->{'content'} ,last; + $db_attr eq 'pages' and $self->{curInfo}->{pages}=$check->{'content'} ,last; + $db_attr eq 'isbn13' and $self->{curInfo}->{isbn}=$check->{'content'} ,last; + $db_attr eq 'binding' and $self->{curInfo}->{format}=$check->{'content'} ,last; + $db_attr eq 'translator' and { ($self->{curInfo}->{translator} ne '' ) and $self->{curInfo}->{translator}.=',' }, $self->{curInfo}->{translator}.=$check->{'content'} ,last; + $db_attr eq 'author-intro' and $self->{curInfo}->{description}.="\n\n".$check->{'content'},last; + ; + } + } + + my $tmp_image=$xml->{'link'}->{'image'}->{'href'}; + $tmp_image =~ s/spic/lpic/; + $self->{curInfo}->{cover}=$tmp_image; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + }; + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + if ($self->{searchField} eq 'isbn') + { + return "http://api.douban.com/book/subject/isbn/" .$word; + } + else + { + return "http://api.douban.com/book/subjects?q=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "豆瓣"; + } + + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + $self->{hasField} = { + title => 1, + authors => 1, + }; + } + + sub getAuthor + { + return 'BW'; + } + + sub getLang + { + return 'ZH'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "UTF-8"; + } + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'ZH'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCFnac.pm b/lib/gcstar/GCPlugins/GCbooks/GCFnac.pm new file mode 100644 index 0000000..9c0e804 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCFnac.pm @@ -0,0 +1,462 @@ +package GCPlugins::GCbooks::GCFnac; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginFnac; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + return if $self->{isFound}; + if (($tagname eq 'h3') && ($attr->{class} eq 'hStyle1')) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + return; + } + elsif ($tagname eq 'td') + { + if (($attr->{width} eq '254') && (!exists $attr->{bgcolor})) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + $self->{isColonne} = 0 ; + } + else + { + $self->{isColonne} ++ ; + $self->{isTitle} = 2 ; + } + } + elsif (($attr->{class} eq 'subTitre') && (!exists $attr->{color}) && ($self->{isTitle} eq '0')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'tpfpublicationtpf') && ($self->{isBook})) + { + $self->{isPublication} = 1 ; + } + elsif (($tagname eq 'a') && ($self->{isBook})) + { + if ($attr->{href} =~ m|/advanced/book.do\?category=book|i) + { + $self->{isBook} = 0 ; + $self->{isUrl} = 0 ; + } + elsif ($self->{isUrl}) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1 ; + $self->{isUrl} = 0 ; + } + elsif ($self->{isColonne} eq 2) + { + $self->{isAuthor} = 1 ; + } + elsif ($self->{isColonne} eq 4) + { + $self->{isPublisher} = 1 ; + } + } + } + else + { + if ($tagname eq 'tr') + { + $self->{isAuthor} = 0 ; + $self->{isISBN} = 0 ; + $self->{isPublisher} = 0 ; + $self->{isFormat} = 0 ; + $self->{isSerie} = 0 ; + $self->{isPublication} = 0 ; + $self->{isPage} = 0 ; + $self->{isTranslator} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isTranslator} eq 1) + { + $self->{isTranslator} = 2 ; + } + elsif (($tagname eq 'h3') && ($attr->{class} eq 'hStyle1')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'strong') && (($self->{isTitle}) || $attr->{class} eq 'titre dispeblock')) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'th') && ($attr->{scope} eq 'row')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'expandimg') && ($self->{bigPics})) + { + $self->{curInfo}->{cover} = $attr->{href} ; + } + elsif (($attr->{class} eq 'activeimg') && ((!$self->{bigPics}) || ($self->{curInfo}->{cover} eq ''))) + { + $self->{isCover} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isCover})) + { + $self->{curInfo}->{cover} = $attr->{src} ; + $self->{isCover} = 0 ; + } + elsif (($tagname eq 'div') && ($attr->{class} =~ /^lireLaSuite/)) + { + $self->{isDescription} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + $self->{isDescription} = 0 if $tagname eq 'div'; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle} eq 1) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if (($self->{itemsList}[$self->{itemIdx}]->{title} eq '') && ($origtext ne '')) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + } + elsif ($origtext ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{title} .= ' - '; + $self->{itemsList}[$self->{itemIdx}]->{title} .= $origtext; + } + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if (($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') && ($origtext ne '')) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + elsif ($origtext ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq '2') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isAuthor} = 1 if ($origtext =~ m/Auteur/i); + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isPublisher} = 1 if ($origtext =~ m/Editeur/i); + $self->{isFormat} = 1 if ($origtext =~ m/Format/i); + $self->{isSerie} = 1 if ($origtext =~ m/Collection/i); + $self->{isPublication} = 1 if ($origtext =~ m/Date de parution/i); + $self->{isPage} = 1 if ($origtext =~ m/pages/i); + $self->{isTranslator} = 1 if ($origtext =~ m/Traduction/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isAuthor} eq 2) + { + # Enleve les virgules + $origtext =~ s/,//; + if ($origtext ne '') + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + } + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + } + elsif ($self->{isFormat} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + } + } + elsif ($self->{isSerie} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + } + elsif ($self->{isPublication} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + } + elsif (($self->{isPage} eq 2)) + { + if ($origtext ne '') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + } + elsif ($self->{isTranslator}) + { + if ($origtext ne '') + { + $self->{curInfo}->{translator} = $origtext; + $self->{isTranslator} = 0 ; + } + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + serie => 0, + }; + + $self->{isFound} = 0; + $self->{isColonne} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|</a><br>|</a><tpfpublicationtpf>|gmi; + } + else + { + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"<strong>Mot de l'"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found +length('<strong>Mot de l\''),length($html)- $found -length('<strong>Mot de l\'')); + my $found2 = index($html2,"<h4 "); + my $html3 = $html2; + if ( $found2 >= 0 ) + { + $html3 = substr($html2, $found2 +length('<h4 '),length($html2)- $found2 -length('<h4 ')); + $html2 = substr($html2, 0, $found2); + } + + $found2 = index($html2,"</strong>"); + if ( $found2 >= 0 ) + { + $html2 = substr($html2, $found2 +length('</strong>'),length($html2)- $found2 -length('</strong>')); + } + + $html2 =~ s|<li>|\n* |gi; + $html2 =~ s|<br>|\n|gi; + $html2 =~ s|<br />|\n|gi; + $html2 =~ s|<b>||gi; + $html2 =~ s|</b>||gi; + $html2 =~ s|<i>||gi; + $html2 =~ s|</i>||gi; + $html2 =~ s|<p>|\n|gi; + $html2 =~ s|</p>||gi; + $html2 =~ s|</h4>||gi; + $html2 =~ s|\x{92}|'|g; + $html2 =~ s|’|'|gi; + $html2 =~ s|•|*|gi; + $html2 =~ s|…|...|gi; + $html2 =~ s|\x{85}|...|gi; + $html2 =~ s|\x{8C}|OE|gi; + $html2 =~ s|\x{9C}|oe|gi; + + } + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www3.fnac.com/search/quick.do?filter=-3&text=". $word ."&category=book"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.fnac.com/'; + } + + sub getName + { + return "Fnac (FR)"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; +# return "UTF-8"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm b/lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm new file mode 100644 index 0000000..eb119a7 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm @@ -0,0 +1,390 @@ +package GCPlugins::GCbooks::GCFnacPT; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginFnacPT; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'a') && ($attr->{class} eq 'txtpretoarial11')) + { + $self->{isBook} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.fnac.pt" . $attr->{href}; + } + elsif (($tagname eq 'strong') && ($self->{isBook})) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + $self->{isBook} = 0 ; + } + } + else + { + if (($tagname eq 'span') && ($attr->{class} eq 'txtpretoarial11')) + { + $self->{isAnalyse} = 1 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'txt_arial14')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'strong') && ($self->{isTitle})) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'txt_arial10') && ( index($attr->{href},"param=autor") >= 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'tabfundo_branco')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m/Images\/catalogo\/livros/i)) + { + $self->{curInfo}->{cover} = "http://www.fnac.pt" . $attr->{src}; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'txtpretoarial11') && ($attr->{colspan} eq '2')) + { + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'object') + { + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'param') + { + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'embed') + { + $self->{isDescription} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + if ($origtext ne '') + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s+//; + $nom_prenom[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{curInfo}->{authors} eq '') + { + if ($nom_prenom[1] eq '') + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] eq '') + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + } + + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isPublisher} = 1 if ($origtext =~ m/Editora/i); + $self->{isFormat} = 1 if ($origtext =~ m/Encaderna/i); + $self->{isSerie} = 1 if ($origtext =~ m/Colec/i); + $self->{isPublication} = 1 if ($origtext =~ m/Ano/i); + $self->{isPage} = 1 if ($origtext =~ m/pages/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + } + elsif ($self->{isSerie} eq 2) + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + elsif (($self->{isPage} eq 2)) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext; + $self->{isDescription} = 0 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + }; + + $self->{isFound} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + my $found = index($html,'"listagem de resultados"'); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('"listagem de resultados"'),length($html)- $found -length('"listagem de resultados"')); + } + + $found = index($html,'"tabela de estrutura do cart'); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + } + else + { + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|</h4>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|<center>||gi; + $html =~ s|</center>||gi; + $html =~ s|</embed>||gi; + $html =~ s|</object>||gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.fnac.pt/pt/Search/Search.aspx?categoryN=&cIndex=&catalog=livros&str=". $word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.fnac.pt/'; + } + + sub getName + { + return "Fnac (PT)"; + } + + sub getCharset + { + my $self = shift; + return "UTF-8"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm b/lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm new file mode 100644 index 0000000..2b7256c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm @@ -0,0 +1,370 @@ +package GCPlugins::GCbooks::GCISBNdb; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginISBNdb; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'div') && ($attr->{class} eq 'bookInfo') && ($self->{searchField} ne 'isbn')) + { + $self->{isBook} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/book/") >= 0) && ($self->{isBook})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://isbndb.com" . $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/person/") >= 0) && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/publisher/") >= 0) && ($self->{isBook})) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{onclick},"isbndbTrackBuy") >= 0) && ($self->{itemIdx} eq '-1')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl} ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'inactive')) + { + $self->{isBook} = 0 ; + } + } + else + { + if ($tagname eq 'title') + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/person/") >= 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/publisher/") >= 0)) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/c/Library_Shelves/Dewey") >= 0)) + { + $self->{isGenre} = 1 ; + } + elsif ($tagname eq 'h2') + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'iframe') && ($self->{curInfo}->{cover} eq '')) + { + my $html = $self->loadPage( $attr->{src}, 0, 1 ); + my $found = index($html,"<img src=\""); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('<img src="'),length($html)- $found -length('<img src="')); + + my @array = split(/"/,$html); + $self->{curInfo}->{cover} = $array[0]; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isFormat} = 1 if ($origtext =~ m/Book Details:/i); + $self->{isDescription} = 1 if ($origtext =~ m/Notes:/i); + $self->{isDescription} = 1 if ($origtext =~ m/Summary:/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat}) + { + my @array = split(/\n/,$origtext); + my @array2; + my @array3; + my $element; + my $element2; + foreach $element (@array) + { + @array2 = split(/:/,$element); + # Enleve les blancs en debut de chaine + $array2[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array2[1] =~ s/\s+$//g; + if ($array2[0] =~ m/Language/i) + { + $self->{curInfo}->{language} = $array2[1]; + } + elsif ($array2[0] =~ m/Physical Description/i) + { + @array3 = split(/;/,$array2[1]); + foreach $element2 (@array3) + { + # Enleve les blancs en debut de chaine + $element2 =~ s/^\s+//; + $_= $element2; + if (/(^[0-9]+)(\s[p])(.*)/) + { + $self->{curInfo}->{pages} = $1; + } + elsif (/(.*)(\s)([0-9]+)(\s[p])(.*)/) + { + $self->{curInfo}->{pages} = $3; + } + } + } + elsif ($array2[0] =~ m/Edition Info/i) + { + @array3 = split(/;/,$array2[1]); + $self->{curInfo}->{format} = $array3[0]; + $_= $array3[1]; + if (/(.*)([0-9][0-9][0-9][0-9])(.*)/) + { + $self->{curInfo}->{publication} = $array3[1]; + # Enleve les blancs en debut de chaine + $self->{curInfo}->{publication} =~ s/^\s+//; + } + } + } + $self->{isFormat} = 0 ; + } + elsif ($self->{isDescription}) + { + $origtext =~ s/\n\n/\n/g; + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0 ; + } + elsif ($self->{isGenre}) + { + my @array = split(/--/,$origtext); + + $self->{curInfo}->{genre} = $array[1]; + # Enleve les blancs en debut de chaine + $self->{curInfo}->{genre} =~ s/^\s+//; + $self->{isGenre} = 0 ; + } + elsif (($origtext =~ m/ISBN:/i) && ($self->{curInfo}->{isbn} eq '')) + { + my @array = split(/:/,$origtext); + + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[1] =~ s/\s+$//g; + my @array2 = split(/ /,$array[1]); + + $self->{curInfo}->{isbn} = $array2[0]; + # Enleve les blancs en debut de chaine + $self->{curInfo}->{isbn} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{curInfo}->{isbn} =~ s/\s+$//g; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + serie => 0, + }; + + $self->{isBook} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isGenre} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://isbndb.com/search-all.html?kw=" .$word; + } + else + { + return "http://isbndb.com/search-title.html?kw=" .$word ."&isn="; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "ISBNdb"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'EN'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm b/lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm new file mode 100644 index 0000000..3b553e9 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm @@ -0,0 +1,464 @@ +package GCPlugins::GCbooks::GCbooksInternetBokHandeln; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginInternetBokHandeln; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'span') && ($attr->{class} eq 'title1')) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'td') && ($attr->{rowspan} eq '4') && ($self->{isBook} eq '0') && ($self->{isFound} eq 0)) + { + # En fait la sequence est un peu tordue. Je cherche le deuxieme passage dans la sequence + # rowspan/a + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isBook} eq '1') && ($self->{isUrl})) + { + $self->{isBook} = 2 ; + } + elsif (($tagname eq 'a') && ($self->{isBook} eq '2') && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.internetbokhandeln.se" . $attr->{href}; + $self->{isUrl} = 0 ; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'author') && ($self->{isFound} eq 0)) + { + $self->{isAuthor} = 1 ; + $self->{isBook} = 0 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'shaded') && ($self->{isFound} eq 0)) + { + $self->{isEditor_Publication_Format_Lang} = 1 ; + $self->{isBook} = 0 ; + } + } + else + { + if ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isEdition} eq 1) + { + $self->{isEdition} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isLanguage} eq 1) + { + $self->{isLanguage} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'title1')) + { + $self->{isTitle} = 1 ; + # On initialise la variable ( sinon d une fiche sur l autre est n est pas reinitialisee ) + $self->{isDescription} = 0; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'font5')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'p') && ($self->{curInfo}->{isbn} ne '') && ($self->{curInfo}->{description} eq '') && ($self->{isDescription} ne 2)) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'largebook')) + { + # Pour etre sur s il n y a pas de commentaire de ne pas prendre n importe quoi + $self->{isDescription} = 2 ; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'pricecolumn')) + { + $self->{isCover} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isCover} eq 1)) + { + # le but est de determiner s il y a une couverture ou non, et s il y en a une, on recuperera + # la version grand format qui est bien plus tard + if ($attr->{onclick} eq 'return showBig();') + { + $self->{isCover} = 2 ; + } + else + { + if ($attr->{src} eq '/i/dummy.gif') + { + # Il n y a pas d image + $self->{isCover} = 3 ; + } + else + { + $self->{curInfo}->{cover} = $attr->{src} ; + $self->{isCover} = 3 ; + } + } + } + elsif (($tagname eq 'img') && ($attr->{onclick} eq 'return hideBig();') && ($self->{isCover} eq 2)) + { + $self->{curInfo}->{cover} = $attr->{src} ; + $self->{isCover} = 3 ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/;/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$//; + $nom_prenom[1] =~ s/\s$//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[0]; + } + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isEditor_Publication_Format_Lang}) + { + my @Editor_Publication_Format_Lang = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{publication} = $Editor_Publication_Format_Lang[1]; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s$+//; + + $self->{itemsList}[$self->{itemIdx}]->{format} = $Editor_Publication_Format_Lang[2]; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/^\s+//; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/\s$+//; + + $self->{isEditor_Publication_Format_Lang} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isAuthor} = 1 if ($origtext =~ m/F.*rfattare/i); + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isPublisher} = 1 if ($origtext =~ m/F.*rlag/i); + $self->{isFormat} = 1 if ($origtext =~ m/Band/i); + $self->{isEdition} = 1 if ($origtext =~ m/Upplagenr/i); + $self->{isPage} = 1 if ($origtext =~ m/Sidor/i); + $self->{isLanguage} = 1 if ($origtext =~ m/Spr.*k/i); + $self->{isPublication} = 1 if ($origtext =~ m/Utgivning/i); + $self->{isSerie} = 1 if ($origtext =~ m/Serie/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isAuthor} eq 2) + { + my @array = split(/;/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$//; + $nom_prenom[1] =~ s/\s$//; + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= $nom_prenom[0]; + } + $self->{curInfo}->{authors} .= ","; + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + # Il y a 2 ISBN sur le site, seul le premier m interesse + if ($self->{curInfo}->{isbn} eq '') + { + $self->{curInfo}->{isbn} = $origtext; + } + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + my @array = split(/\n/,$origtext); + + $self->{curInfo}->{format} = $array[0]; + $self->{isFormat} = 0 ; + } + elsif ($self->{isEdition} eq 2) + { + # There is some trouble on the site with this field : it is not accurrate. For example for ISBN 9113014528 + # this field is set to 7000. So for instance this field isn't taken. +# $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + elsif ($self->{isLanguage} eq 2) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s|([0-9]*) ([A-Za-z]*) ([0-9]*)|$1.'/'.$self->{monthNumber}->{$2}.'/'.$3|e; + $self->{curInfo}->{publication} =~ s|([A-Za-z]*) ([0-9]*)|$self->{monthNumber}->{$1}.'/'.$2|e; + $self->{isPublication} = 0 ; + } + elsif ($self->{isSerie} eq 2) + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isDescription} eq 1) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 2 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{monthNumber} = { + Januari => '01', + Februari => '02', + Mars => '03', + April => '04', + Maj => '05', + Juni => '06', + Juli => '07', + Augusti => '08', + September => '09', + Oktober => '10', + November => '11', + December => '12' + }; + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 0, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.internetbokhandeln.se/results.html?new_search=1&all_search=" . $word. "&search_media=all"; + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.internetbokhandeln.se/'; + } + + sub getName + { + return "InternetBokHandeln"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'SV'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm b/lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm new file mode 100644 index 0000000..713646c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm @@ -0,0 +1,376 @@ +package GCPlugins::GCbooks::GCInternetBookShop;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginInternetBookShop;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'td')
+ {
+ if($self->{bookStep} == 0)
+ {
+ $self->{bookStep} = 1 ;
+ }
+ }
+ elsif ($tagname eq 'img')
+ {
+ if($self->{bookStep} == 1)
+ {
+ $self->{bookStep} = 2;
+ }
+ }
+ elsif ($tagname eq 'a')
+ {
+ if($self->{bookStep}==2)
+ {
+ $self->{url} = $attr->{href} ;
+ $self->{bookStep} = 3 ;
+ $self->{isTitle} = 1 ;
+ }
+ }
+ elsif (($tagname eq 'br') && ($self->{bookStep}==3))
+ {
+ $self->{bookStep} = 4 ;
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'i') && ($self->{bookStep}==4))
+ {
+ $self->{isBook} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{url};
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{title};
+
+ if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '' )
+ {
+ my @fields = split /,/, $self->{authorAndYear};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $fields[0];
+ }
+ $self->{isPublisher} = 1;
+ }
+ elsif ($tagname ne 'b')
+ {
+ $self->{bookStep} = 0;
+ $self->{url} = '';
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isPage} = 0;
+ $self->{isSerie} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isDescription} = 0;
+ }
+ }
+ else
+ {
+ if (($tagname eq 'input') && ( $attr->{name} eq 'isbn') && ($self->{curInfo}->{isbn} eq ''))
+ {
+ $self->{curInfo}->{isbn} = $attr->{value} ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{src} =~ m/$self->{curInfo}->{isbn}/i) && ($attr->{src} =~ m/cop/i))
+ {
+ $self->{curInfo}->{cover} = $attr->{src};
+ }
+ elsif ($self->{bookStep} == 1)
+ {
+ if (($tagname eq 'a') && ($self->{areAuthors} == 0))
+ {
+ $self->{isAuthor} = 1;
+ $self->{areAuthors} = 1;
+ }
+ if ($self->{areAuthors} == 1)
+ {
+ if ($tagname eq 'a')
+ {
+ $self->{isAuthor} = 1;
+ }
+ else
+ {
+ $self->{bookStep} = 2;
+ $self->{areAuthors} = 0;
+ }
+ }
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ $self->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor})
+ {
+ $self->{authorAndYear} = $origtext;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($origtext eq 'Titolo')
+ {
+ $self->{isTitle} = 1;
+ }
+ elsif ($origtext eq 'Autore')
+ {
+ $self->{bookStep} = 1;
+ }
+ elsif ($origtext eq 'Dati')
+ {
+ $self->{isPage} = 1;
+ }
+ elsif ($origtext eq 'Editore')
+ {
+ $self->{isPublisher} = 1;
+ }
+ elsif ($origtext eq 'Traduttore')
+ {
+ $self->{isTranslator} = 1;
+ }
+ elsif ($origtext eq '(collana')
+ {
+ $self->{isSerie} = 1;
+ }
+ elsif ($origtext eq 'Descrizione')
+ {
+ $self->{isDescription} = 1;
+ }
+ else
+ {
+ if ($self->{isTitle})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0;
+ }
+ elsif ($self->{isAuthor})
+ {
+ if ($self->{curInfo}->{authors} eq '')
+ {
+ $self->{curInfo}->{authors} = $origtext;
+ }
+ else
+ {
+ $self->{curInfo}->{authors} .= ", " . $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPage})
+ {
+ my @array = split(/,/,$origtext);
+
+ $self->{curInfo}->{publication} = $array[0];
+ $self->{curInfo}->{pages} = $array[1];
+ # Enleve les blancs en debut de chaine
+ $self->{curInfo}->{pages} =~ s/^\s+//;
+ $self->{curInfo}->{pages} =~ s/p.//;
+ if ($array[3] ne '')
+ {
+ $self->{curInfo}->{format} = $array[2] . "," .$array[3];
+ }
+ else
+ {
+ $self->{curInfo}->{format} = $array[2];
+ }
+ # Enleve les blancs en debut de chaine
+ $self->{curInfo}->{format} =~ s/^\s+//;
+
+ $self->{isPage} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isTranslator})
+ {
+ $self->{curInfo}->{translator} = $origtext;
+ $self->{isTranslator} = 0 ;
+ }
+ elsif ($self->{isDescription})
+ {
+ $self->{curInfo}->{description} .= $origtext;
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 0,
+ format => 0,
+ edition => 1,
+ };
+
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isPage} = 0;
+ $self->{isSerie} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isDescription} = 0;
+ $self->{areAuthors} = 0;
+
+ $self->{bookStep} = 0;
+ $self->{url} = '';
+ $self->{authorAndYear} = '';
+ $self->{title} = '';
+
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ $html =~ s|<br><i>|<i>|gi;
+ }
+ else
+ {
+ my $found = index($html,'<a name="commenti">');
+ if ( $found >= 0 )
+ {
+ $html = substr($html, 0, $found);
+ }
+
+ $html =~ s|<u>||gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|</h4>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|<center>||gi;
+ $html =~ s|</center>||gi;
+ $html =~ s|</embed>||gi;
+ $html =~ s|</object>||gi;
+
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ return "http://www.internetbookshop.it/ser/serpge.asp?type=keyword&x=".$word;
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+ return $url if $url;
+ return 'http://www.internetbookshop.it/';
+ }
+
+ sub getName
+ {
+ return "InternetBookShop";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-1";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'IT';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['title'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm b/lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm new file mode 100644 index 0000000..ff4d6c4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm @@ -0,0 +1,334 @@ +package GCPlugins::GCbooks::GCLeLivre; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginLeLivre; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'font') && ( $attr->{size} eq '-1') && ( $attr->{face} eq 'Courier New, Courier, mono') && ( $attr->{color} eq '#990000')) + { + $self->{itemIdx}++; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'input') && ( $attr->{name} eq 'add')) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.le-livre.com/index.php?fich=fiche_info.php3&ref=" . $attr->{value}; + } + elsif (($tagname eq 'font') && ( $attr->{size} eq '-1') && ( $attr->{face} eq 'Courier New, Courier, mono') && ( $attr->{color} eq '#0000CC')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'font') && ( $attr->{size} eq '-1') && ( $attr->{face} eq 'Times New Roman, Times, serif')) + { + $self->{isPublisher} = 1 ; + } + } + else + { + if ($self->{isTitle} eq 3) + { + $self->{isTitle} = 0 ; + $self->{isAuthor} = 1 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{isISBN} = 3 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{isFormat} = 3 ; + } + elsif (($tagname eq 'font') && ( $attr->{color} eq '#990000') && ($self->{curInfo}->{title} eq '')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'font') && ( $attr->{size} eq '2') && ( $attr->{face} eq 'Arial, Helvetica, sans-serif') && ($self->{isTitle} eq 1)) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'img') && ( index($attr->{src},"/photos/") >= 0) && ($self->{curInfo}->{cover} eq '')) + { + $self->{curInfo}->{cover} = $attr->{src}; + } + elsif (($tagname eq 'font') && ( $attr->{color} eq '#000099')) + { + $self->{isAnalyse} = 1 ; + } + elsif ($tagname eq 'tpftraducteurtpf') + { + $self->{isTranslator} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $origtext =~ s|\.\.|\.|gi; + my @array = split(/\./,$origtext); + $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0]; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[1]; + $self->{itemsList}[$self->{itemIdx}]->{format} = $array[2]; + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq 2) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 3 ; + } + elsif ($self->{isAuthor}) + { + $origtext =~ s|/ ||g; + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isTranslator}) + { + $self->{curInfo}->{translator} = $origtext; + $self->{isTranslator} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isFormat} = 1 if ($origtext =~ m/Descriptif/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat} eq 3) + { + $origtext =~ s|\.\.|\.|gi; + my @array = split(/\./,$origtext); + $self->{curInfo}->{publisher} = $array[0]; + + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[1] =~ s/\s+$//g; + $_= $array[1]; + if (/(.*)([0-9][0-9][0-9][0-9])(.*)/) + { + $self->{curInfo}->{publication} = $array[1]; + } + + # Enleve les blancs en debut de chaine + $array[2] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[2] =~ s/\s+$//g; + $self->{curInfo}->{format} = $array[2]; + + my $element; + foreach $element (@array) + { + $element =~ s/^\s+//; + $_= $element; + if (/(^[0-9]+)(\s[p])(.*)/) + { + $self->{curInfo}->{pages} = $1; + } + elsif (/(^[Oo][u][v][r][a][g][e])(\s[e][n]\s)(.*)/) + { + $self->{curInfo}->{language} = $3; + } + } + + $self->{isFormat} = 0 ; + + } + elsif ($self->{isISBN} eq 3) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + + $html =~ s|: </font>|<tpfpourfaireunebalisetpf>|gi; + $html =~ s|Traduction de |<tpftraducteurtpf>|gi; + + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.le-livre.com/index.php?page=1&Categ=0&mot=". $word; + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Le-Livre"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['ISBN', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm b/lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm new file mode 100644 index 0000000..1b219aa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm @@ -0,0 +1,418 @@ +package GCPlugins::GCbooks::GCLiberOnWeb; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginLiberOnWeb; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + use Encode; + use HTML::Entities; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'font') && ($attr->{color} eq '#E7E4D8') && ($attr->{face} eq 'Arial')) + { + $self->{itemIdx}++; + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#D90000') && ($attr->{size} eq '3') && ($self->{isBook})) + { + $self->{isAuthor} = 0 ; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#FFFFFF') && ($attr->{size} eq '2') && ($attr->{face} eq 'Arial') && ($self->{isBook})) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|libro.asp|i) && ($self->{isBook}) && ($self->{isUrl})) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.liberonweb.com/asp/" . $attr->{href}; + $self->{isUrl} = 0 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#D90000') && ($attr->{size} eq '5') && ($self->{searchField} eq 'isbn')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + } + else + { + if (($tagname eq 'font') && ($attr->{color} eq '#E7E4D8') && ($attr->{size} eq '4')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#D90000') && ($attr->{size} eq '5')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{face} eq 'Arial') && ($attr->{size} eq '2')) + { + $self->{isGenre} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{face} eq 'Verdana, Arial, Helvetica') && ($attr->{size} eq '2') && ($attr->{color} eq '')) + { + $self->{isFormat} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#6F6948') && ($attr->{size} eq '4')) + { + $self->{isAnalyse} = 0 ; + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'tpfserie') + { + $self->{isSerie} = 1 ; + } + elsif ($tagname eq 'tpfanalysecarac') + { + $self->{isSerie} = 0 ; + $self->{isAnalyse} = 1 ; + } + elsif ($tagname eq 'tpffindesc') + { + $self->{isDescription} = 0 ; + } + elsif (($tagname eq 'tpfsautdeligne') && ($self->{isDescription})) + { + $self->{curInfo}->{description} .= "\n"; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m|/images/books/|i)) + { + $self->{curInfo}->{cover} = 'http://www.liberonweb.com/asp/' .$attr->{src}; + + my $isbn = reverse($attr->{src}); + my $found = index($isbn,"/"); + if ( $found >= 0 ) + { + $isbn = substr($isbn, 0,$found); + $isbn = reverse($isbn); + $found = index($isbn,"."); + if ( $found >= 0 ) + { + $self->{curInfo}->{isbn} = substr($isbn, 0,$found); + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + if (($origtext =~ m/Collana:/i) && ($self->{itemsList}[$self->{itemIdx}]->{edition} eq '')) + { + my @array = split(/-/,$origtext); + # Enleve les blancs en debut de chaine + $array[0] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[0] =~ s/\s+$//g; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0]; + } + elsif (($origtext =~ m/Anno /i) && ($self->{itemsList}[$self->{itemIdx}]->{publication} eq '')) + { + my $found = index($origtext,"Anno "); + if ( $found >= 0 ) + { + $origtext = substr($origtext, $found +length('Anno '),length($origtext)- $found -length('Anno ')); + my @array = split(/,/,$origtext); + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[0]; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s+$//g; + } + } + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isGenre}) + { + if ($origtext =~ m/Argomenti:/i) + { + my @array = split(/:/,$origtext); + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + $array[1] =~ s|, |,|gi; + $self->{curInfo}->{genre} = $array[1]; + } + $self->{isGenre} = 0 ; + } + elsif ($self->{isFormat}) + { + if ($origtext =~ m/Caratteristiche:/i) + { + my @array = split(/:/,$origtext); + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + $self->{curInfo}->{format} = $array[1]; + } + $self->{isFormat} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/-/,$origtext); + my $element; + + foreach $element (@array) + { + my @array = split(/\(/,$element); + # Enleve les blancs en debut de chaine + $array[0] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[0] =~ s/\s+$//; + + if ($array[0] ne '') + { + $self->{curInfo}->{authors} .= $array[0]; + $self->{curInfo}->{authors} .= ","; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isSerie}) + { + if ($origtext =~ m/Collana:/i) + { + my @array = split(/:/,$origtext); + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + $self->{curInfo}->{serie} = $array[1]; + } + elsif (($origtext ne '') && ($self->{curInfo}->{serie} eq '')) + { + $self->{curInfo}->{publisher} = $origtext; + } + } + elsif ($self->{isAnalyse}) + { + + my @array = split(/ - /,$origtext); + my $element; + + foreach $element (@array) + { + # Enleve les blancs en debut de chaine + $element =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $element =~ s/\s+$//; + + if ($element =~ m/Pagine/i) + { + $element =~ s/Pagine //i; + $element =~ s/-/,/i; + my @array2 = split(/,/,$element); + if ($array2[1] eq '') + { + $self->{curInfo}->{pages} = $array2[0]; + } + else + { + $self->{curInfo}->{pages} = $array2[1]; + } + # Enleve les blancs en debut de chaine + $self->{curInfo}->{pages} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{curInfo}->{pages} =~ s/\s+$//; + } + elsif ($element =~ m/Anno/i) + { + my @array2 = split(/ /,$element); + $self->{curInfo}->{publication} = $array2[1]; + } + } + + } + elsif ($self->{isDescription}) + { + if ($origtext ne '') + { + $self->{curInfo}->{description} .= $origtext; + $self->{curInfo}->{description} .= "\n"; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + }; + + $self->{isTitle} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isAuthor} = 0; + $self->{isSerie} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + $html =~ s|\n||gi; + $html =~ s|\r||gi; + $html =~ s|\t||gi; + + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|<tpfsautdeligne>|gi; + $html =~ s|<br />|<tpfsautdeligne>|gi; + $html =~ s|<br clear=all>|<tpfsautdeligne>|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<!--Visualizzazione delle Note del libro-->|<tpfanalysecarac>|gi; + $html =~ s|<!--Visualizzazione dell'Editore e della Collana-->|<tpfserie>|gi; + $html =~ s|<font face=Verdana, Arial, Helvetica size=2>|<font face="Verdana, Arial, Helvetica" size=2>|gi; + $html =~ s|<!--mstheme-->|<tpffindesc>|gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.liberonweb.com/asp/libro.asp?ISBN=" . $word; + } + else + { + return "http://www.liberonweb.com/asp/lista.asp?D1=Titolo&T1=" . $word. "&I1=1"; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "LiberOnWeb"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'IT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm b/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm new file mode 100644 index 0000000..1afdc67 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm @@ -0,0 +1,365 @@ +package GCPlugins::GCbooks::GCbooksMareno; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchURL = ""; + +{ + package GCPlugins::GCbooks::GCPluginMareno; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'title') #od razu mamy wynik? + { + $self->{isBook} = 7; + } + + if (($tagname eq 'table') && ($attr->{class} eq 'bookData')) + { + $self->{itemIdx}++; + $self->{isBook} = 1; + } + if (($tagname eq 'a') && ($self->{isBook} == 1)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.mareno.pl".$attr->{href}; + $self->{isUrl} = 0; + $self->{isTitle} = 1; + } + if (($tagname eq 'div') && ($attr->{class} eq 'bookAuthor') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 1; + $self->{isFormat} = 1; + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPDATE')) + { + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgFORMAT')) + { + $self->{isFormat} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgDESCR')) + { + $self->{isDescription} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgTITLE')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgAUTOR')) + { + $self->{isAuthor} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isAuthor} eq '1') + { + $self->{isAuthor} = 2; + } + elsif ($self->{isAuthor} eq '2') + { + $self->{isAuthor} = 1; + } + } + if (($tagname eq 'a') && ($attr->{href} =~ /okladki\/big/)) + { + $self->{isCover} = 1; + $self->{curInfo}->{cover} = "http://www.mareno.pl".$attr->{href}; + $self->{isCover} = 0; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq 'table') + { + $self->{isBook} = 0; + } + if ($tagname eq 'div') + { + $self->{isAuthor} = 0; + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 7) #od razu mamy wynik? + { + $origtext =~ s|^\s*||gs; + $origtext =~ s|\s*$||gs; + if (($origtext ne '') && ($origtext !~ /wyszukiwanie/)) + { + $self->{isUrl} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchURL; + $self->{isUrl} = 0; + } + $self->{isBook} = 0; + } + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + if ($self->{isAuthor} == 1) + { + my ($au, $fo, $pu, $pd); + $origtext =~ m|(#\^#- [^#]+#\^#)?(okładka\s*[^,]+,\s*)?([^,]+,\s*)?(\d*)?|s; + $au = $1; + $fo = $2; + $pu = $3; + $pd = $4; + $au =~ s|#\^#- ([^#]+)#\^#|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $au; + $self->{isAuthor} = 0; + $fo =~ s|okładka\s*([^,]+),\s*|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{format} = $fo; + $self->{isFormat} = 0; + $pu =~ s|([^,]+),\s*|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{publisher} = $pu; + $self->{isPublisher} = 0; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $pd; + $self->{isPublication} = 0; + } + if ($self->{isTitle} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} == 1) + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($origtext ne '') + { + if ($self->{curInfo}->{authors} ne '') + { + $self->{curInfo}->{authors} .= ","; + } + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 2; + } + if ($self->{isFormat} == 1) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + if ($self->{isDescription} == 1) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0; + } + if ($self->{isISBN} eq '1') + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isPublisher} eq '1') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + if ($self->{isPublication} eq '1') + { + $origtext =~ s|(\S*)\s*(\S{4})|$2|; + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 0, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s/<\/?(b|strong)>//gi; + $html =~ s|</?font[^>]*>||gi; + $html =~ s|<br>|#\^#|gi; + $html =~ s|<TABLE border="0">\s*<tr>\s*<td valign=top>\s*</td>|<table border="0" class="bookData">|gs; + $html =~ s|<td valign=top align=center><a href="[^"]*" class="left-menulink">\s*<IMG SRC[^>]*></a></td>||gs; + $html =~ s|<td valign=top align=left><A HREF([^>]*)>\s*|<a href$1>|gm; + $html =~ s|</a> \s*|</a>\n<div class="bookAuthor">|gm; + $html =~ s|</td></tr>|</div>|g; + } + else + { + $html =~ s/<\/?(i|br|strong)>//gi; + + $html =~ s|<h1>([^<]*)</h1>|<div id="wrgTITLE">$1</div>|s; + $html =~ s|<h2><A(.*)</A></h2>|<div id="wrgAUTOR"><A$1</A></div>|s; + $html =~ s|<span class=textsmall>\s*ISBN:\s*([\dX]*)\s*</span>|<div id="wrgISBN">$1</div>|s; + $html =~ s|<span class=textsmall>\s*okładka:\s*([^,]*),?\s*(\d*)[^<]*</span>|<div id="wrgFORMAT">$1</div><div id="wrgPAGES">$2</div>|s; + $html =~ s|<span class=textsmall>\s*wydawnictwo:\s*([^,]*),\s*(\d*)\s*</span>|<div id="wrgPUBLI">$1</div><div id="wrgPDATE">$2</div>|s; + $html =~ s|opis produktu:\s*([^<]*)<hr>|<div id="wrgDESCR">$1</div><hr>|; +# $html =~ s|<dt>Seria:</dt>$*\s*<dd>(.*)</dd>|<div id="wrgSERIA">$1</div>|; +# $html =~ s|<dt>Wydanie:</dt><dd>(.*)</dd>|<div id="wrgEDITI">$1</div>|; +# $html =~ s|<dt>Tłumaczenie:\s*</dt>$*\s*<dd>|<dd id="wrgTRANS">|; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + $searchURL = "http://www.mareno.pl/rezultat.php?tytul=".$word; + return $searchURL; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.mareno.pl/'; + } + + sub getName + { + return "Mareno"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm b/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm new file mode 100644 index 0000000..6b5f41b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm @@ -0,0 +1,333 @@ +package GCPlugins::GCbooks::GCMediabooks;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginMediabooks;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ use Encode;
+ use HTML::Entities;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+
+ if (($tagname eq 'font') && ($attr->{class} eq 'font4Copy'))
+ {
+ $self->{isBook} = 1 ;
+ $self->{isUrl} = 1 ;
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/artigos/popUp_detalhe.jsp|i) && ($self->{isBook}) && ($self->{isUrl}))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ my $found = index($self->{itemsList}[$self->{itemIdx}]->{url},"'");
+ if ( $found >= 0 )
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{url} = substr($self->{itemsList}[$self->{itemIdx}]->{url}, $found +length("'"),length($self->{itemsList}[$self->{itemIdx}]->{url})- $found -length("'"));
+ $found = index($self->{itemsList}[$self->{itemIdx}]->{url},"'");
+ if ( $found >= 0 )
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{url} = substr($self->{itemsList}[$self->{itemIdx}]->{url}, 0, $found);
+ }
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.mediabooks.pt" .$self->{itemsList}[$self->{itemIdx}]->{url};
+ }
+
+ $self->{isTitle} = 1 ;
+ $self->{isUrl} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/autores/index.jsp|i) && ($self->{isBook}))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/editores/index.jsp|i) && ($self->{isBook}))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'input') && ($attr->{type} eq 'hidden'))
+ {
+ $self->{isBook} = 0 ;
+ }
+ }
+ else
+ {
+ if (($tagname eq 'a') && ($attr->{href} =~ m|/autores/index.jsp|i))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/editores/index.jsp|i))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif ($self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif (($tagname eq 'span') && ($self->{isTitle}))
+ {
+ $self->{isTitle} = 2 ;
+ }
+ elsif (($tagname eq 'span') && ($attr->{class} eq 'font4Copy'))
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{src} =~ m|/artigos/imagens/|i))
+ {
+ if ($origtext =~ m|/artigos/imagens/livros|i)
+ {
+ }
+ else
+ {
+ $self->{curInfo}->{cover} = 'http://www.mediabooks.pt' .$attr->{src};
+ }
+
+ $self->{isTitle} = 1 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{isFound} = 0 ;
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 1)
+ {
+ # Enleve les retours chariots
+ $origtext =~ s/\n//g;
+ $origtext =~ s/\r//g;
+ if (($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') && ($origtext ne ''))
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{isTitle} eq '2')
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 1)
+ {
+ if ($origtext ne '')
+ {
+ $self->{curInfo}->{authors} .= $origtext;
+ $self->{curInfo}->{authors} .= ",";
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i);
+ $self->{isFormat} = 1 if ($origtext =~ m/Formato/i);
+ $self->{isDescription} = 1 if ($origtext =~ m/Breve Descri/i);
+ $self->{isPublication} = 1 if ($origtext =~ m/Ano de Edi/i);
+ $self->{isPage} = 1 if ($origtext =~ m/P.ginas/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isFormat})
+ {
+ $self->{curInfo}->{format} = $origtext;
+ $self->{isFormat} = 0 ;
+ }
+ elsif ($self->{isPublication})
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isPage})
+ {
+ $self->{curInfo}->{pages} = $origtext;
+ $self->{isPage} = 0 ;
+ }
+ elsif ($self->{isDescription})
+ {
+ $self->{curInfo}->{description} .= $origtext;
+ }
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 0,
+ format => 0,
+ edition => 1,
+ };
+
+ $self->{isFound} = 0;
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isFormatPublication} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isISBN} = 0;
+ $self->{isPublication} = 0;
+ $self->{isFormat} = 0;
+ $self->{isPage} = 0;
+ $self->{isDescription} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s|\n||gi;
+ $html =~ s|\r||gi;
+ $html =~ s|\t||gi;
+
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|</h4>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'isbn')
+ {
+ return ('http://www.mediabooks.pt/pesquisa/result_pesq.jsp', ["v_sec_id" => "1", "v_prev_sec_id" => "", "v_pes_id" => "2", "v_pesquisa" => "$word", "image.x" => "5", "image.y" => "7"] );
+ }
+ else
+ {
+ return ('http://www.mediabooks.pt/pesquisa/result_pesq.jsp', ["v_sec_id" => "1", "v_prev_sec_id" => "", "v_pes_id" => "1", "v_pesquisa" => "$word", "image.x" => "5", "image.y" => "7"] );
+ }
+
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.mediabooks.pt/';
+ }
+
+ sub getName
+ {
+ return "Mediabooks";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'PT';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn', 'title'];
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm b/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm new file mode 100644 index 0000000..5c5129a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm @@ -0,0 +1,389 @@ +package GCPlugins::GCbooks::GCbooksMerlin; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginMerlin; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'li') && ($attr->{class} eq 'tytul')) + { + $self->{isBook} = 1; + $self->{isUrl} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'li') && ($attr->{class} eq 'wydawca')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'a') + && ($self->{isUrl} eq '1')) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.merlin.com.pl".$attr->{href}; + $self->{isUrl} = 0; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPDATE')) + { + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgSERIA')) + { + $self->{isSerie} = 2; + } + if (($tagname eq 'a') && ($self->{isSerie} eq '2')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'prodHead')) + { + $self->{isCover} = 2; + $self->{isTitle} = 2; + $self->{isFormat} = 2; + } + if (($tagname eq 'h1') && ($attr->{class} eq 'prodTitle') && ($self->{isTitle} eq '2')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'prodPerson')) + { + $self->{isAuthor} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isAuthor} eq '1') + { + $self->{isAuthor} = 2; + } + elsif ($self->{isAuthor} eq '2') + { + $self->{isAuthor} = 1; + } + } + if (($tagname eq 'dd') && ($attr->{id} eq 'wrgTRANS')) + { + $self->{isTranslator} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isTranslator} eq '1') + { + $self->{isTranslator} = 2; + } + elsif ($self->{isTranslator} eq '2') + { + $self->{isTranslator} = 1; + } + } + if (($tagname eq 'div') && ($attr->{id} eq 'prodImg') && ($self->{isCover} eq '2')) + { + $self->{isCover} = 1; + } + if (($tagname eq 'img') && ($self->{isCover} eq '1')) + { + $self->{curInfo}->{cover} = "http://www.merlin.com.pl".$attr->{src}; + $self->{isCover} = 0; + } + if (($tagname eq 'div') && ($attr->{class} eq 'prodFeatureSpec') && ($self->{isFormat} eq '2')) + { + $self->{isFormat} = 1; + } + if (($tagname eq 'div') && ($attr->{class} eq 'productDesc')) + { + $self->{isDescription} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq 'h2') + { + $self->{isAuthor} = 0; + } + if ($tagname eq 'dd') + { + $self->{isTranslator} = 0; + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} eq '1') + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + $self->{isBook} = 0; + if ($self->{inside}->{a}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isBook} = 1; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + } + if ($self->{isPublisher} eq '1') + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0; + } + + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} eq '1') + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($origtext ne '') + { + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 2; + } + if ($self->{isTranslator} eq '1') + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($self->{curInfo}->{translator} eq '') + { + $self->{curInfo}->{translator} = $origtext; + } + else + { + $self->{curInfo}->{translator} .= ", ".$origtext; + } + $self->{isTranslator} = 2; + } + if ($self->{isFormat} eq '1') + { + $origtext =~ s|okładka: ||m; + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + if ($self->{isDescription} eq '1') + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0; + } + + if ($self->{isISBN} eq '1') + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isPublisher} eq '1') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + if ($self->{isPublication} eq '1') + { + $origtext =~ s|(\S*)\s*(\S{4})|$2|; + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + if ($self->{isSerie} eq '1') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s|<b>(.*?)</b>|$1|gms; + $html =~ s|<li class="tytul">(.*)</li>\s*<li>|<li class="tytul">$1</li><li class="wydawca">|gm; + } + else + { + $html =~ s|</?strong>||gi; + $html =~ s|</?i>||gi; + $html =~ s|</?br>||gi; + $html =~ s|<dfn>(.*?)</dfn>||gs; + + $html =~ s|<dt>ISBN:</dt><dd>(.*)</dd>|<div id="wrgISBN">$1</div>|; + $html =~ s|<dt>Liczba stron:</dt><dd>(.*)</dd>|<div id="wrgPAGES">$1</div>|; + $html =~ s|<dt>Seria:</dt>\s*<dd>(.*)</dd>|<div id="wrgSERIA">$1</div>|m; + $html =~ s|<dt>Wydanie:</dt><dd>(.*)</dd>|<div id="wrgEDITI">$1</div>|; + $html =~ s|<dt>Wydawnictwo:</dt>\s*<dd>\s*(.*)\s*,*\s*(.*)\s*</dd>|<div id="wrgPUBLI">$1</div><div id="wrgPDATE">$2</div>|m; + $html =~ s|<dt>Tłumaczenie:\s*</dt>\s*<dd>|<dd id="wrgTRANS">|m; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.merlin.com.pl/frontend/browse/search/1.html?phrase=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.merlin.com.pl/'; + } + + sub getName + { + return "Merlin"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm b/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm new file mode 100644 index 0000000..6bc22eb --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm @@ -0,0 +1,447 @@ +package GCPlugins::GCbooks::GCbooksNUKat; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchURL = ""; +my $searchISBN = ""; + +{ + package GCPlugins::GCbooks::GCPluginNUKat; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'title') #od razu mamy wynik + { + $self->{isBook} = 7; + } + + if (($tagname eq 'tr') && ($attr->{class} eq 'intrRow')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrRowCell1') && ($self->{isBook} == 1)) + { + $self->{isUrl} = 2; + } + if (($tagname eq 'a') && ($self->{isUrl} == 2) && ($origtext =~ /.*function=CARDSCR.*/)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{url} =~ s|skin=portal&||; + $self->{isUrl} = 0; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrAutor') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrTytul') && ($self->{isBook} == 1)) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrWydaw') && ($self->{isBook} == 1)) + { + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'td') && ($attr->{class} eq 'wrgTITLE')) + { + $self->{isTitle} = 1; + $self->{isAuthor} = 1; + $self->{isTranslator} = 1; + $self->{isArtist} = 1; + $self->{isISBN} = 2; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgSERIA')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgISBN') && ($self->{isISBN} == 2)) + { + $self->{isISBN} = 1; + } + + if (($tagname eq 'div') && ($attr->{class} eq 'prodFeatureSpec') && ($self->{isFormat} eq '2')) + { + $self->{isFormat} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 7) #od razu mamy wynik? + { + if ($origtext =~ /Pełny opis/) + { + $self->{isUrl} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchURL; + $self->{isUrl} = 0; + $self->{isBook} = 0; + } + } + if ($self->{isBook} == 1) + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + if ($self->{isTitle} == 1) + { + $origtext =~ s|^\s*([^/]*)/?|$1|m; + $origtext =~ s|^\s*([^:]*):?|$1|m; + $origtext =~ s|\s*$||m; + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} == 1) + { + $origtext =~ s|\s*\/\s*(.*)\s*|$1|; + $origtext =~ s|^\s*([^\.]*)\.?|$1|m; + $origtext =~ s|([^\(]*)(\([^\)]*\))?|$1|; + $origtext =~ s|\s*$||m; + $origtext =~ s|([^,]*), (.*)|$2 $1|m; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + $self->{isAuthor} = 0; + } + if ($self->{isPublication} == 1) + { + $origtext =~ s|(.*)(\d{4})\D*|$2|s; + $origtext =~ s|^\s*([^\.]*)\.?|$1|m; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublication} = 0; + $self->{isBook} = 0; + } + } + + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isFormat} eq '1') + { + $origtext =~ s|okładka: ||m; + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + + + + if ($self->{isISBN} eq '1') + { + my ($pom1, $pom2); + if ($self->{searchField} eq 'isbn') + { + $pom1 = $self->{searchISBN}; + $pom2 = $origtext; + $pom2 =~ s|[^\dX]||g; + $pom1 =~ s|-||g; + $pom2 =~ s|-||g; + if ($pom1 eq $pom2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + else + { + $self->{isISBN} = 2; + } + } + else + { + $origtext =~ s|[^\dX]||g; + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + } + if ($self->{isTitle} eq '1') + { + my ($pom1, $pom2, $ti, $au, $tr, $il); + $origtext =~ m|([^/]*)/\s*([^;]*)(; )?([^;]*)(; )?([^;]*)$|; + $ti = $1; + $au = $2; + $pom1 = $4; + $pom2 = $6; + $ti =~ s|^\s*||; + $ti =~ s|\s*$||; + $self->{curInfo}->{title} = $ti; + $self->{isTitle} = 0; + $au =~ s| i |,|g; + $au =~ s|, |,|g; + $au =~ s|[\[\]]||g; + $au =~ s|tekst||g; + $au =~ s|^\s*||; + $au =~ s|\s*$||; + $au =~ s|(.*)(\.{1})|$1|; + $self->{curInfo}->{authors} = $au; + $self->{isAuthor} = 0; + $pom1 =~ s|[\[\]]||g; + $pom1 =~ m|(.*)(.{1})|; + if ($2 eq '.') + { + $pom1 = $1; + } + $pom2 =~ s|[\[\]]||g; + $pom2 =~ m|(.*)(.{1})|; + if ($2 eq '.') + { + $pom2 = $1; + } + if ($pom2 =~ /(przeł\.|przekł\.|tł\.|tłum\.)/) + { + $tr = $pom2; + } + if ($pom2 =~ /(il\.|oprac\. graf\.)/) + { + $il = $pom2; + } + if ($pom1 =~ /(przeł\.|przekł\.|tł\.|tłum\.)/) + { + $tr = $pom1; + } + if ($pom1 =~ /(il\.|oprac\. graf\.)/) + { + $il = $pom1; + } + $tr =~ s/(przeł\.|przekł\.|tł\.|tłum\.)//; + $tr =~ s|z \w+\.||; + $tr =~ s|^\s*||; + $tr =~ s|\s*$||; + $tr =~ s| i |,|g; + $tr =~ s|, |,|g; + $self->{curInfo}->{translator} = $tr; + $self->{isTranslator} = 0; + $il =~ s/(il\.|oprac\. graf\.)//; + $il =~ s|^\s*||; + $il =~ s|\s*$||; + $il =~ s| i |,|g; + $il =~ s|, |,|g; + $self->{curInfo}->{artist} = $il; + $self->{isArtist} = 0; + } + if ($self->{isPage} eq '1') + { + $origtext =~ s|(\d*)\D.*|$1|; + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $origtext =~ s|\D*(\d*)\D.*|$1|; + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isPublisher} eq '1') + { + my $pom = $origtext; + $origtext =~ s|[^:]*:\s*(.*),.*|$1|; + $origtext =~ s|^\s*||; + $origtext =~ s|"(.*)"|$1|; + $self->{curInfo}->{publisher} = $origtext; + $pom =~ s|(.*)(\d{4})(\D*)|$2|; + $self->{curInfo}->{publication} = $pom; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + } + if ($self->{isSerie} eq '1') + { + $origtext =~ s|([^;]*)(;.*)|$1|; + $origtext =~ s|\s*$||; + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + $self->{isArtist} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s|<b>(.*?)</b>|$1|gms; + $html =~ s|<img .*/book.gif">||g; + $html =~ s|<font.*</font>||g; + $html =~ s|<span class="highlight[^>]+>||g; + $html =~ s|</?span[^>]*>||g; + $html =~ s|<th[^>]*>Autor</th>\s*<td><a[^>]*>([^<]*)</a>|<td class="intrAutor">$1|gs; + $html =~ s|<th[^>]*>Tytuł</th>\s*<td><a[^>]*>([^<]*)</a>|<td class="intrTytul">$1|gs; + $html =~ s|<th[^>]*>Adres wyd.</th>\s*<td>|<td class="intrWydaw">|gs; + } + else + { + $html =~ s|</?strong>||gi; + $html =~ s|</?i>||gi; + $html =~ s|</?br>||gi; + + $html =~ s|<th[^>]*>Tytuł</th>\s*<td>\s*<a[^>]*>([^<]*)</a>|<td class="wrgTITLE">$1|gs; + $html =~ s|<th[^>]*>Strefa serii</th>\s*<td>\s*<a[^>]*>([^<]*)</a>|<td class="wrgSERIA">$1|gs; + $html =~ s|<th[^>]*>Adres wydawniczy</th>\s*<td>|<td class="wrgPUBLI">|gs; + $html =~ s|<th[^>]*>Opis fizyczny</th>\s*<td>|<td class="wrgPAGES">|gs; + $html =~ s|<th[^>]*>Oznaczenie wydania</th>\s*<td>|<td class="wrgEDITI">|gs; + $html =~ s|<th[^>]*>ISBN</th>\s*<td>|<td class="wrgISBN">|gs; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + my $bubu; + if ($self->{searchField} eq 'isbn') + { + $bubu = "7"; + $self->{searchISBN} = $word; + } + else + { + $bubu = "4"; + $self->{searchISBN} = ""; + } + $searchURL = "http://www.nukat.edu.pl/cgi-bin/gw_43_3/chameleon?host=193.0.118.2%2b1111%2bDEFAULT&search=KEYWORD&function=INITREQ&conf=.%2fchameleon.conf&lng=pl&u1=".$bubu."&t1=".$word; + return $searchURL; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.nukat.edu.pl/'; + } + + sub getName + { + return "NUKat"; + } + + sub getCharset + { + my $self = shift; + return "UTF-8"; + #return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm b/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm new file mode 100644 index 0000000..c878af9 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm @@ -0,0 +1,462 @@ +package GCPlugins::GCbooks::GCNooSFere;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginNooSFere;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ return if ( $self->{isFound} eq 2 );
+ if (($tagname eq 'td') && ($attr->{class} eq 'onglet_bleu'))
+ {
+ $self->{isFound} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m/editionslivre.asp\?numitem=/i) && !($attr->{href} =~ m/numediteur=/i) && !($attr->{href} =~ m/tri=/i))
+ {
+ $self->{isTitle} = 1 ;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isAuthor} eq 0))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|./editeur.asp\?numediteur=|i))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|./serie.asp\?NumSerie=|i))
+ {
+ $self->{isSerie} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m/editionslivre.asp\?numitem=/i) && ($attr->{href} =~ m/numediteur=/i))
+ {
+
+ my $html = $self->loadPage( "http://www.noosfere.org/icarus/livres/" . $attr->{href}, 0, 1 );
+ my $found = index($html,"Fiche livre : les éditions");
+ if ( $found >= 0 )
+ {
+
+ while (index($html,"./niourf.asp?numlivre="))
+ {
+ $found = index($html,"./niourf.asp?numlivre=");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found +length('./niourf.asp?numlivre='),length($html)- $found -length('./niourf.asp?numlivre='));
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor};
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/niourf.asp?numlivre=" . substr($html, 0, index($html,"\""));
+ }
+ else
+ {
+ last;
+ }
+
+ }
+ }
+ else
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor};
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/" . $attr->{href};
+ }
+ }
+ elsif ($tagname eq 'h1')
+ {
+ $self->{isTitle} = 1 ;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|./niourf.asp\?numlivre=|i))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor};
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/" . $attr->{href};
+ }
+ elsif (($tagname eq 'td') && ($attr->{class} eq 'onglet_biblio1'))
+ {
+ $self->{isAuthor} = 2 ;
+ }
+ elsif (($tagname eq 'table') && ($attr->{class} eq 'piedpage'))
+ {
+ $self->{isAuthor} = 0 ;
+ }
+ }
+ else
+ {
+ if (($tagname eq 'mytpf') && ($attr->{id} eq 'TPFENDCOMMENTTPF'))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{class} eq 'TitreNiourf'))
+ {
+ $self->{isAnalyse} = 0 ;
+ $self->{isTitle} = 1 ;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{class} eq 'AuteurNiourf'))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isAuthor} eq 1))
+ {
+ $self->{isAuthor} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|actu_mois.asp\?|i))
+ {
+ $self->{isPublication} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|editeur.asp\?numediteur=|i) && ($self->{curInfo}->{publisher} eq ''))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|collection.asp\?NumCollection=|i) && ($self->{curInfo}->{serie} eq ''))
+ {
+ $self->{isSerie} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isTranslator} eq 1))
+ {
+ $self->{isTranslator} = 2 ;
+ }
+ elsif ($tagname eq 'br')
+ {
+ $self->{isAnalyseTrans} = 1 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{style} eq 'font-size:12px;') && ($self->{isAnalyse} eq 0))
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{name} eq 'couverture'))
+ {
+ $self->{curInfo}->{cover} = "http://www.noosfere.org/icarus/livres/" . $attr->{src} ;
+ }
+ elsif (($tagname eq 'mytpf') && ($attr->{id} eq 'TPFSTARTCOMMENTTPF'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{isFound} = 0 ;
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ $self->{saveTitle} = $origtext;
+ $self->{saveAuthor} = '';
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 1)
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if (($self->{saveAuthor} eq '') && ($origtext ne ''))
+ {
+ $self->{saveAuthor} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{saveAuthor} .= ', ';
+ $self->{saveAuthor} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ elsif ($self->{isFound} eq 1)
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($origtext eq 'Fiche livre')
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl};
+ $self->{isFound} = 2 ;
+ }
+ else
+ {
+ $self->{isFound} = 0 ;
+ }
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{isTitle} eq '1')
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAnalyse} eq 1)
+ {
+ my $found = index($origtext," pages");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{pages} = substr($origtext, 0, $found);
+ }
+ $found = index($origtext,"ISBN : ");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{isbn} = substr($origtext, $found +length('ISBN : '),length($origtext)- $found -length('ISBN : '));
+ }
+
+ $self->{isAnalyse} = 2 ;
+ }
+ elsif ($self->{isAnalyseTrans})
+ {
+ $self->{isTranslator} = 1 if ($origtext =~ m/Traduction/i);
+
+ $self->{isAnalyseTrans} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 2)
+ {
+ if (($self->{curInfo}->{authors} eq '') && ($origtext ne ''))
+ {
+ $self->{curInfo}->{authors} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{curInfo}->{authors} .= ', ';
+ $self->{curInfo}->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 1 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ elsif ($self->{isPublication})
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isTranslator} eq 2)
+ {
+ $self->{curInfo}->{translator} = $origtext;
+ $self->{isTranslator} = 0 ;
+ }
+ elsif ($self->{isDescription})
+ {
+ if ($origtext =~ m/Pas de texte sur la quatri.me de couverture\./i)
+ {
+ }
+ else
+ {
+ $self->{curInfo}->{description} .= $origtext ."\n";
+ }
+ }
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 0,
+ format => 0,
+ edition => 1,
+ serie => 1,
+ };
+
+ $self->{saveTitle} = '';
+ $self->{saveAuthor} = '';
+ $self->{isFound} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isPublication} = 0;
+ $self->{isSerie} = 0;
+ $self->{isDescription} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isAnalyseTrans} = 0;
+ $self->{isAnalyse} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ # Le descriptif pouvant contenir des balises html je le repere maintenant
+ my $found = index($html,"Id=\"R");
+ if ( $found >= 0 )
+ {
+ my $html2 = substr($html, $found +length('Id="R'),length($html)- $found -length('Id="R'));
+ my $found2 = index($html2,"<TD class=\"noocell_fs15\" valign=\"top\">");
+ if ( $found2 >= 0 )
+ {
+ $html2 = substr($html2, $found2 +length('<TD class="noocell_fs15" valign="top">'),length($html2)- $found2 -length('<TD class="noocell_fs15" valign="top">'));
+ }
+
+ $found2 = index($html2,"</TD>");
+ if ( $found2 >= 0 )
+ {
+ $html2 = substr($html2, 0, $found2);
+ }
+
+ $html2 =~ s|<li>|\n* |gi;
+ $html2 =~ s|<br>|\n|gi;
+ $html2 =~ s|<br />|\n|gi;
+ $html2 =~ s|<b>||gi;
+ $html2 =~ s|</b>||gi;
+ $html2 =~ s|<i>||gi;
+ $html2 =~ s|</i>||gi;
+ $html2 =~ s|<p>|\n|gi;
+ $html2 =~ s|</p>||gi;
+ $html2 =~ s|</h4>||gi;
+ $html2 =~ s|\x{92}|'|g;
+ $html2 =~ s|’|'|gi;
+ $html2 =~ s|•|*|gi;
+ $html2 =~ s|œ|oe|gi;
+ $html2 =~ s|…|...|gi;
+ $html2 =~ s|\x{85}|...|gi;
+ $html2 =~ s|\x{8C}|OE|gi;
+ $html2 =~ s|\x{9C}|oe|gi;
+
+ $html = substr($html, 0, $found) . "><mytpf id=\"TPFSTARTCOMMENTTPF\">" . $html2 ."</mytpf><mytpf id=\"TPFENDCOMMENTTPF\"></mytpf>";
+
+ }
+
+ $html =~ s|<b><p>||gmi;
+ $html =~ s|<br><br>|<br>|gmi;
+ $html =~ s|<br><|<|gmi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'isbn')
+ {
+ return "http://www.noosfere.org/icarus/livres/cyborg_livre.asp?mini=1000&maxi=3000&mode=Idem&EtOuParution=NS&isbn=". $word;
+ }
+ else
+ {
+ return "http://www.noosfere.org/icarus/livres/cyborg_livre.asp?mini=1000&maxi=3000&mode=Idem&EtOuParution=NS&titre=". $word;
+ }
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.noosfere.org/';
+ }
+
+ sub getName
+ {
+ return "nooSFere";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-15";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn', 'title'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm new file mode 100644 index 0000000..54dd119 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm @@ -0,0 +1,303 @@ +package GCPlugins::GCbooks::GCSaraiva; + +################################################### +# +# Plugin for a brazilian bookstore named "Saraiva". +# Code written by Guilherme "nirev" Nogueira. +# guilherme at nirev dot org +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginSaraiva; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'div') && ($attr->{class} eq 'hsliceLista')) + { + $self->{isResult} = 1; + $self->{linkCount} = 0; + $self->{itemIdx}++; + } + if (($tagname eq 'span') && ($attr->{class} eq 'entry-title')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'titulo_autor')) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'a') && $self->{isResult} && $self->{linkCount} == 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{linkCount}++; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'entry-content')) + { + $self->{isResult} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{id} eq 'imgProd')) + { + my $imgid = $attr->{src}; + $imgid =~ s/(.)*pro_id=//; + $imgid =~ s/&.*$//; + $self->{curInfo}->{cover} = 'http://www.livrariasaraiva.com.br/imagem/imagem.dll?tam=2&pro_id='.$imgid; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba1')) + { + $self->{isDescription} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba2')) + { + $self->{divInfo} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'produtosAbasMenus')) + { + $self->{divInfo} = 0; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'tituloprod')) + { + $self->{isTitle} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaAutor();')) + { + $self->{isAuthor} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaMarca();')) + { + $self->{isPublisher} = 1; + } + elsif (($tagname eq 'font')) + { + $self->{isAnalyse} = 1; + } + elsif (($tagname eq 'b') && $self->{divInfo} == 1) + { + $self->{isAnalyse} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my $texto = $origtext; + $self->{itemsList}[$self->{itemIdx}]->{title} = $texto; + $self->{isTitle} = 0; + } + if ($self->{isAuthor}) + { + my $texto = $origtext; + $texto =~ s/<br>//; + my @dados = split(' / ', $texto); + $self->{itemsList}[$self->{itemIdx}]->{authors} = $dados[0]; + $self->{isAuthor} = 0; + } + } + else + { + if ($self->{isAuthor}) + { + my @authors = split(';', $origtext); + my $authors = ''; + my $tam = @authors; + my $count = 0; + for($count = 0; $count < $tam; $count++) + { + $authors[$count] =~ s/^\s*//gi; + $authors[$count] =~ s/\s*$//gi; + my @names = split(', ', $authors[$count]); + $authors .= ',' if ($count); + $authors .= $names[1].' '.$names[0]; + + } + $self->{curInfo}->{authors} = $authors; + $self->{isAuthor} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{curInfo}->{description} =~ s/^\s*//; + $self->{curInfo}->{description} =~ s/\s+/ /; + $self->{isDescription} = 0; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/I\.S\.B\.N/i); + $self->{isFormat} = 1 if ($origtext =~ m/Acabamento/i); + $self->{isPublication} = 1 if ($origtext =~ m/Edição/i); + $self->{isPage} = 1 if ($origtext =~ m/Número de Paginas/i); + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isDescription} = 0; + $self->{isResult} = 0; + $self->{linkCount} = 0; + $self->{divInfo} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + my $inicio_res = index($html,'<div id="esquerdaPesquisa" style="display:none;">esquerdaPesquisa</div>'); + if ( $inicio_res >= 0 ) + { + $html = substr($html, $inicio_res); + } + my $fim_res = index($html,'<div id="direitaPesquisa" style="display:none;">direitaPesquisa</div>'); + if ( $fim_res >= 0 ) + { + $html = substr($html, 0, $fim_res); + } + $html = '' if ($inicio_res < 0); + } + else + { + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s|\s+|\+|; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + else + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.livrariasaraiva.com.br".$url; + } + + sub getName + { + return "Saraiva"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'nirev'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm new file mode 100644 index 0000000..ee556dc --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm @@ -0,0 +1,331 @@ +package GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ( (($tagname eq 'div') && ($attr->{class} eq 'productTitleFormat')) + || (($tagname eq 'a') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_linkProductTitle')) + ) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'a') && ($attr->{id} =~ m/_hlkTitle/i) && ($self->{isFound} eq '0')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.adlibris.com/" . $self->{isLang} . "/" . $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} =~ m/ctl00_main_frame_ctrlsearchhit_rptSearchHit_ctl/i) && ($attr->{id} =~ m/_Label2/i) && ($self->{isFound} eq '0')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} =~ m/ctl00_main_frame_ctrlsearchhit_rptSearchHit_ctl/i) && ($attr->{id} =~ m/_Label4/i) && ($self->{isFound} eq '0')) + { + $self->{isFormat} = 1 ; + } + } + else + { + if (($tagname eq 'h1')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'li') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_liISBN13')) + { + $self->{isbnLevel} = 1 ; + } + elsif ($self->{isbnLevel} > 0) + { + if ($self->{isbnLevel} < 5) + { + $self->{isbnLevel}++ ; + } + else + { + $self->{isISBN} = 1 ; + $self->{isbnLevel} = 0 ; + } + } + elsif (($tagname eq 'a') && (($attr->{id} eq 'ctl00_main_frame_ctrlproduct_rptAuthor_ctl00_linkAuthor')) || ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_rptAuthor_ctl01_linkAuthor')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_linkPublisher')) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblPublished')) + { + $self->{isPublication} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblPages')) + { + $self->{isPages} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblLanguage')) + { + $self->{isLanguage} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblFormat')) + { + $self->{isReliure} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescription')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_imgProduct_ProductImageNotLinked') && !($attr->{src} =~ m/\/noimage./i)) + { + $self->{curInfo}->{cover} = $attr->{src} ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + if (($self->{isDescription}) && ($tagname eq 'div')) + { + $self->{isDescription} = 0; + $self->{curInfo}->{description} =~ s/^Beskrivning://g ; + $self->{curInfo}->{description} =~ s/^Kuvaus://g ; + } + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s+//; + $nom_prenom[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$+//; + $nom_prenom[1] =~ s/\s$+//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{itemsList}[$self->{itemIdx}]->{format} = $origtext; + $self->{isFormat} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{curInfo}->{isbn} =~ s/\s//g; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s/(\d\d\d\d)(\d\d)/01\/$2\/$1/g; + $self->{isPublication} = 0 ; + } + elsif ($self->{isPages}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPages} = 0 ; + } + elsif ($self->{isLanguage}) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isReliure}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isReliure} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 1, + edition => 0, + }; + + $self->{isLang} = 'se'; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isFormat} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublicationAndPages} = 0; + $self->{isLangAndReliure} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|<li>|\n* |g; + $html =~ s|<br>|\n|g; + $html =~ s|<br />|\n|g; + $html =~ s|<p>|\n|g; + $html =~ s|<b>||g; + $html =~ s|</b>||g; + $html =~ s|<i>||g; + $html =~ s|</i>||g; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.adlibris.com/" . $self->{isLang} . "/searchresult.aspx?isbn=" . $word. "&%3BfromProduct=true"; + } + else + { + return "http://www.adlibris.com/" . $self->{isLang} . "/searchresult.aspx?title=" . $word. "&%3BfromProduct=true"; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Adlibris"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'SW'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm new file mode 100644 index 0000000..bc75766 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm @@ -0,0 +1,65 @@ +package GCPlugins::GCbooks::GCbooksAmazonCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; +use GCPlugins::GCstar::GCAmazonCommon; + +{ + package GCPlugins::GCbooks::GCbooksAmazonPluginsBase; + + use base ('GCPlugins::GCbooks::GCbooksPluginsBase', 'GCPlugins::GCstar::GCPluginAmazonCommon'); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{codeField} = 'isbn'; + + return $self; + } + + sub isEAN + { + my ($self, $value) = @_; + + return $value =~ /^978/; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getEanField + { + return 'isbn'; + } +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm new file mode 100644 index 0000000..a74e35a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCbooksCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCbooks::GCbooksPluginsBase; + + use base qw(GCPluginParser); + use HTML::Entities; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + $self->{searchType} = 'books'; + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub getEanField + { + my $self = shift; + my $fields = $self->getSearchFieldsArray; + return 'isbn' + if $fields->[0] eq 'isbn'; + return undef; + } + +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm new file mode 100644 index 0000000..457194a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm @@ -0,0 +1,398 @@ +package GCPlugins::GCcomics::GCbedetheque; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCcomics::GCcomicsCommon; + +{ + + package GCPlugins::GCcomics::GCPluginbedetheque; + + use LWP::Simple qw($ua); + + use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase); + sub getSearchUrl + { + my ( $self, $word ) = @_; + if ($self->{searchField} eq 'series') + { + return "http://www.bedetheque.com/index.php?R=1&RechSerie=$word"; + } + elsif ($self->{searchField} eq 'writer') + { + return "http://www.bedetheque.com/index.php?R=1&RechAuteur=$word"; + } + else + { + return ''; + } + + #return "http://www.bedetheque.com/index.php?R=1&RechTexte=$word"; + } + + sub getSearchFieldsArray + { + return ['series', 'writer']; + } + + sub getItemUrl + { + my ( $self, $url ) = @_; + my @array = split( /#/, $url ); + $self->{site_internal_id} = $array[1]; + + return $url if $url =~ /^http:/; + return "http://www.bedetheque.com/" . $url; + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "Bedetheque"; + } + + sub getAuthor + { + return 'Mckmonster'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless( $self, $class ); + + $self->{hasField} = { + series => 1, + title => 1, + volume => 1, + }; + + $self->{isResultsTable} = 0; + $self->{isCover} = 0; + $self->{itemIdx} = 0; + $self->{last_cover} = ""; + $self->{site_internal_id} = ""; + $self->{serie} = ""; + $self->{synopsis} = ""; + $self->{current_field} = ""; + + return $self; + } + + sub preProcess + { + my ( $self, $html ) = @_; + + $self->{parsingEnded} = 0; + $html =~ s/\s+/ /g; + $html =~ s/\r?\n//g; + + if ( $self->{parsingList} ) + { + if ( $html =~ m/(\d+\salbum\(s\).+)/ ) { + + #keep only albums, no series or objects + $html = $1; + $self->{alternative} = 0; + } elsif ( $html =~ m/(<div id="albums_table">.+)/ ) { + $html = $1; + $self->{alternative} = 1; + } + } + else + { + $html =~ m/(<div class="box main reeditions">.+)/; + + #$html =~ m/(<div class="album.+)/; + $html = $1; + $self->{isResultsTable} = 0; + $self->{parsingEnded} = 0; + $self->{isCover} = 0; + $self->{itemIdx}++;; + } + + return $html; + } + + sub start + { + my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} ) + { + if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) + { + if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/album-/ ) ) + { + $self->{isCollection} = 1; + $self->{itemIdx}++; + + my $searchUrl = substr($attr->{href},0,index($attr->{href},".")).substr($attr->{href},index($attr->{href},".")); + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchUrl; + $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title}; + + #$self->{itemsList}[ $self->{itemIdx} ]->{url} = + # "http://www.bedetheque.com/" . $attr->{href}; + } + elsif ( $tagname eq "i" ) + { + $self->{isSerie} = 1; + } + } else { + if ( ( $tagname eq "table" ) && ( $attr->{id} eq "albums_serie" ) ) { + $self->{inTable} = 1; + } + elsif ( ($self->{inTable}) && ( $tagname eq "td" ) && ( $attr->{class} eq "num" ) ) { + $self->{itemIdx}++; + $self->{isVolume} = 1; + } + elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/serie-/ ) ) { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1; + } + elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "br" ) && ( $self->{startSynopsis} ) ) { + + # This is a stop! for br ;-) and complementary of the p in the end section + # should be ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) + $self->{isSynopsis} = 0; + $self->{startSynopsis} = 0; + $self->{parsingEnded} = 1; + } + } + } + else + { + if ( $tagname eq "title") + { + $self->{isIssue} = 1; + $self->{isTitle} = 1; + } + + if ( ( $self->{isCover} == 0 ) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/Couvertures\/.*\.[jJ][pP][gG]/ ) ) + { + $self->{curInfo}->{image} = 'http://www.bedetheque.com/' . $attr->{href}; + $self->{isCover} = 1; + } + elsif ( ( $tagname eq "div") && ( $attr->{class} eq "titre" ) ) { + $self->{isVolume} = 1; + } + elsif ( ( $tagname eq "ul") && ( $attr->{class} eq "infos" ) ) { + $self->{isResultsTable} = 1; + } + elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) { + $self->{current_field} = ''; + $self->{openlabel} = 1; + } + elsif ( ( $tagname eq "div" ) && ( $attr->{class} eq "title" ) && ( !defined( $self->{curInfo}->{title} ) || ( $self->{curInfo}->{title} =~ /^$/ ) ) ) { + $self->{isTitle} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{class} eq "type" ) ) { + $self->{isSerie} = 1; + } + elsif ( $tagname eq "em" ) { + $self->{isSynopsis} = 1; + } + elsif ( ( $tagname eq "a" ) && ( $attr->{class} eq "titre eo" ) ) { + if ( $attr->{title} =~ m/.+\s-(\d+)-\s.+/ ) { + $self->{curInfo}->{volume} = $1; + } + } + } + } + + sub text + { + my ( $self, $origtext ) = @_; + + return if ( $origtext eq " " ); + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} ) + { + if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) { + if ( $self->{isSerie} == 1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext; + $self->{isSerie} = 0; + } + else + { + if ($self->{isCollection} == 1) + { + + #sometimes the field is "-vol-title", sometimes "--vol-title" + $origtext =~ s/-+/-/; + if ( $origtext =~ m/(.+)\s-(\d+)-\s(.+)/ ) { + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $2; + } elsif ( $origtext =~ /-/ ){ + my @fields = split( /-/, $origtext ); + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $fields[0]; + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $fields[1]; + } + $self->{isCollection} = 0; + } + } + } else { + if ( ( $self->{inTable} ) && ( $self->{isTitle} ) ) { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } elsif ( ( $self->{inTable} ) && ( $self->{isVolume} ) ) { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext; + } + } + } + else + { + if ( $self->{isResultsTable} == 1 ) + { + $origtext=~s/:\s+/:/; + my %td_fields_map = ( + "Identifiant :" => '', + "Scénario :" => 'writer', + "Dessin :" => 'illustrator', + "Couleurs :" => 'colourist', + "Dépot légal :" => 'publishdate', + "Achevé impr. :" => 'printdate ', + "Estimation :" => 'cost', + "Editeur :" => 'publisher', + "Collection : " => 'collection', + "Taille :" => 'format', + "ISBN :" => 'isbn', + "Planches :" => 'numberboards' + ); + + if ( ( $self->{openlabel} ) && ( exists $td_fields_map{$origtext} ) ) { + $self->{current_field} = $td_fields_map{$origtext}; + } + elsif ( defined ( $self->{current_field} ) && ( $self->{current_field} !~ /^$/ ) ) + { + $origtext=~s/ / /g; + $origtext=~s/\s+$//g; + $self->{curInfo}->{$self->{current_field}} = $origtext; + $self->{current_field} = ""; + } + } + elsif ( $self->{isVolume} ) + { + $self->{curInfo}->{volume} = $origtext; + $self->{isVolume} = 0 ; + } + + if ( $self->{isTitle} ) + { + $self->{curInfo}->{title} = $origtext; + } + elsif ( $self->{isSerie} ) { + $self->{curInfo}->{series} = $origtext; + $self->{curInfo}->{series} =~s/^\s+//; + } + elsif ( ( $self->{isSynopsis} ) && ( ( $origtext =~ /Résumé de l'album :/ ) || ( $origtext =~ /Résumé de la série :/ ) ) ) { + $self->{startSynopsis} = 1; + } + elsif ( ( $self->{isSynopsis} ) && ( $self->{startSynopsis} ) ) { + $self->{curInfo}->{synopsis} .= " ".$origtext; + $self->{curInfo}->{synopsis} =~ s/^(\s)*//; + $self->{curInfo}->{synopsis} =~ s/(\s)*$//; + } + } + } + + sub end + { + my ( $self, $tagname ) = @_; + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} ) + { + if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) { + if ( ( $tagname eq "i" ) && $self->{isCollection} == 1) + { + + #end of collection, next field is title + $self->{isTitle} = 1; + $self->{isCollection} = 0; + } + } else { + if ( ( $self->{inTable} ) && ( $tagname eq "a" ) ) { + $self->{isTitle} = 0; + } elsif ( ( $self->{inTable} ) && ( $tagname eq "td" ) ) { + $self->{isVolume} = 0; + } + } + } + else + { + if ( ( $tagname eq "ul" ) && $self->{isResultsTable} == 1 ) + { + $self->{isIssue} = 0; + $self->{isResultsTable} = 0; + } + elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) { + $self->{openlabel} = 0; + } + elsif ( ( $self->{isTitle} ) && ( ( $tagname eq "div" ) || ( $tagname eq "h1" ) ) ) { + $self->{isTitle} = 0; + } + elsif ( ( $self->{isSerie} ) && ( $tagname eq "a" ) ) { + $self->{isSerie} = 0; + } + elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "em" ) && ( !$self->{startSynopsis} ) ) { + $self->{isSynopsis} = 0; + $self->{startSynopsis} = 0; + } + elsif ( ( $self->{isSynopsis} ) && ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) && ( $self->{startSynopsis} ) ) { + $self->{isSynopsis} = 0; + $self->{startSynopsis} = 0; + $self->{parsingEnded} = 1; + } + } + } +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm b/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm new file mode 100644 index 0000000..80b299b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm @@ -0,0 +1,546 @@ +package GCPlugins::GCcomics::GCcomicbookdb; + +################################################### +# +# Copyright 2005-2012 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCcomics::GCcomicsCommon; + +{ + + package GCPlugins::GCcomics::GCPlugincomicbookdb; + + use LWP::Simple qw($ua); + use HTTP::Cookies; + + use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + if ($self->{pass} == 1) + { + # First pass, searching for series name + if ($tagname eq "h2") + { + $self->{isAtResults} = 1; + } + if ( ($tagname eq "a") + && ($self->{isAtResults}) + && !($attr->{href} =~ m/ebay\.com/)) + { + $self->{isCollection} = 1; + $self->{itemIdx}++; + + $self->{itemsList}[ $self->{itemIdx} ]->{nextUrl} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + } + else + { + # Second pass, or fetching item info + if ($self->{parsingList}) + { + + if ( ($tagname eq "tbody") + && ($self->{isResultsTable}) + && ($self->{isSpecialIssue} == 1)) + { + $self->{isSpecialIssue} = 2; + } + # Parsing issue list + if (($tagname eq "a") && ($self->{isResultsTable})) + { + if ($attr->{href} =~ m/javascript/) + { + # Multiple editions of the one issue, need to be + # handled differently + $self->{isSpecialIssue} = 1; + } + elsif ($attr->{href} =~ m/storyarc.php/) + { + # Prevent story arcs from populating lists + } + elsif ($self->{isSpecialIssue} == 1) + { + $self->{resultsTableColumn}++; + if ($self->{resultsTableColumn} == 1) + { + $self->{isSpecialIssueNo} = 1; + $self->{isIssue} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + elsif ($self->{resultsTableColumn} == 2) + { + $self->{isTitle} = 1; + $self->{isSpecialTitle} = 1; + } + } + elsif ($self->{isSpecialIssue} == 2) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = + "http://www.comicbookdb.com/" . $attr->{href}; + $self->{isTitle} = 1; + } + else + { + $self->{resultsTableColumn}++; + if ($self->{resultsTableColumn} == 1) + { + $self->{isIssue} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + elsif ($self->{resultsTableColumn} == 2) + { + $self->{isTitle} = 1; + } + } + } + } + else + { + # Fetching item info + if ( ($tagname eq "span") + && ((index $attr->{class}, "page_headline") > -1)) + { + $self->{insideHeadline} = 1; + } + elsif (($tagname eq "a") + && ($self->{insideHeadline}) + && ($attr->{href} =~ m/title.php/)) + { + $self->{insideName} = 1; + } + elsif (($tagname eq "a") + && ($self->{insideHeadline}) + && ($attr->{href} =~ m/issue_number.php/)) + { + $self->{insideNumber} = 1; + } + elsif (($tagname eq "a") && ($self->{nextisWriters})) + { + $self->{insideWriters} = 1; + $self->{insidePencillers} = 0; + $self->{insideColorists} = 0; + } + elsif (($tagname eq "a") && ($self->{nextisPencillers})) + { + $self->{insideWriters} = 0; + $self->{insidePencillers} = 1; + $self->{insideColorists} = 0; + } + elsif (($tagname eq "a") && ($self->{nextisColorists})) + { + $self->{insideWriters} = 0; + $self->{insidePencillers} = 0; + $self->{insideColorists} = 1; + } + elsif (($tagname eq "a") && ($attr->{href} =~ /imprint.php/)) + { + $self->{insidePublisher} = 1; + } + elsif (($tagname eq "a") + && ($attr->{href} =~ /publisher.php/) + && (!$self->{curInfo}->{publisher})) + { + $self->{insidePublisher} = 1; + } + elsif (($tagname eq "a") && ($attr->{href} =~ /coverdate.php/)) + { + $self->{insideCoverDate} = 1; + } + if ( ($tagname eq "span") + && ((index $attr->{class}, "test") > -1) + && ((index $attr->{class}, "page_subheadline") > -1)) + { + $self->{insideSubHeadline} = 1; + } + elsif (($tagname eq "a") + && ($attr->{href} =~ /^graphics\/comic_graphics\//)) + { + $self->{curInfo}->{image} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + elsif (($tagname eq "img") + && ($attr->{src} =~ /^graphics\/comic_graphics\//) + && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = + "http://www.comicbookdb.com/" . $attr->{src}; + } + + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{isResultsTable}) + { + if ($tagname eq "table") + { + $self->{isResultsTable} = 0; + } + elsif ($tagname eq "tr") + { + $self->{resultsTableColumn} = 0; + } + } + + if ($tagname eq "tbody") + { + $self->{isSpecialIssue} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideHeadline} = 0; + $self->{insideSubHeadline} = 0; + $self->{insideNumber} = 0; + } + elsif ($tagname eq "td") + { + $self->{isAtResults} = 0; + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 0; + $self->{insideWriters} = 0; + $self->{insidePencillers} = 0; + $self->{insideColorists} = 0; + } + elsif ($tagname eq "a") + { + $self->{insidePublisher} = 0; + $self->{insideCoverDate} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($origtext eq " "); + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isCollection}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext; + $self->{isCollection} = 0; + } + if ($origtext eq "Cover Date") + { + $self->{isResultsTable} = 1; + } + if ($self->{isIssue}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext; + $self->{isIssue} = 0; + } + if ($self->{isSpecialIssueNo}) + { + $self->{specialIssueNo} = $origtext; + $self->{isSpecialIssueNo} = 0; + } + if ($self->{isTitle}) + { + if ($self->{isSpecialIssue} == 2) + { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = + $self->{specialIssueNo}; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = + $self->{specialTitle} . $origtext; + } + else + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + $self->{isTitle} = 0; + } + if ($self->{isSpecialTitle}) + { + $self->{specialTitle} = $origtext; + $self->{isSpecialTitle} = 0; + } + } + else + { + if ($self->{insideName}) + { + $self->{curInfo}->{series} = $origtext; + #$self->{curInfo}->{series} =~ s/(\s\([0-9]*\))$//; + $self->{insideName} = 0; + } + elsif (($self->{insideNumber}) && ($origtext =~ /^\s*#(\d+)/)) + { + # volume where #XX is in <A HREF... tag, '-' is not + $self->{curInfo}->{volume} = $1; + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /-\s#(\d+)/)) + { + # volume where #XX isn't in <A HREF... tag + $self->{curInfo}->{volume} = $1; + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /-\s*TPB/)) + { + # Trade paperback + $self->{curInfo}->{series} .= " TPB"; + + # Get volume number. Default to 1. + if ($origtext =~ /vol\. (\d+)/) + { + $self->{curInfo}->{volume} = $1; + } + else + { + $self->{curInfo}->{volume} = 1; + } + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /vol\. (\d+)/)) + { + $self->{curInfo}->{volume} = $1; + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /-\s*Annual\s*(\d+)/)) + { + # Annual volume where #XX isn't in <A HREF... tag + $self->{curInfo}->{volume} = $1; + $self->{curInfo}->{series} .= " Annual"; + $self->{insideNumber} = 0; + } + elsif (($self->{insideSubHeadline}) && ($origtext =~ /\"(.*)\"/)) + { + $self->{curInfo}->{title} = $1; + + # Get printing or other note if present + if ($origtext =~ /\((.*)\)/) + { + $self->{curInfo}->{title} .= " (" . $1 . ")"; + } + } + elsif ($self->{insidePublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{insidePublisher} = 0; + } + elsif ($origtext eq "Writer(s):") + { + $self->{nextisWriters} = 1; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 0; + } + elsif ($origtext eq "Penciller(s):") + { + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 1; + $self->{nextisColorists} = 0; + } + elsif ($origtext eq "Colorist(s):") + { + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 1; + } + elsif (($origtext eq "Letterer(s):") + || ($origtext eq "Inker(s):") + || ($origtext eq "Editor(s):") + || ($origtext eq "Cover Artist(s):") + || ($origtext eq "Characters:") + || ($origtext eq "Groups:")) + { + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 0; + } + elsif ($self->{insideWriters}) + { + if ($self->{curInfo}->{writer} eq "") + { + $self->{curInfo}->{writer} = $origtext; + } + elsif ((index $self->{curInfo}->{writer}, $origtext) == -1) + { + $self->{curInfo}->{writer} .= ", "; + $self->{curInfo}->{writer} .= $origtext; + } + + $self->{insideWriters} = 0; + } + elsif ($self->{insidePencillers}) + { + if ($self->{curInfo}->{illustrator} eq "") + { + $self->{curInfo}->{illustrator} = $origtext; + } + elsif ((index $self->{curInfo}->{illustrator}, $origtext) == -1) + { + $self->{curInfo}->{illustrator} .= ", "; + $self->{curInfo}->{illustrator} .= $origtext; + } + + $self->{insidePencillers} = 0; + } + elsif ($self->{insideColorists}) + { + if ($self->{curInfo}->{colourist} eq "") + { + $self->{curInfo}->{colourist} = $origtext; + } + elsif ((index $self->{curInfo}->{colourist}, $origtext) == -1) + { + $self->{curInfo}->{colourist} .= ", "; + $self->{curInfo}->{colourist} .= $origtext; + } + + $self->{insideColorists} = 0; + } + elsif ($origtext eq "Synopsis: ") + { + $self->{nextisSynopsis} = 1; + } + elsif ($self->{nextisSynopsis}) + { + if ($origtext !~ /None entered./) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{curInfo}->{synopsis} =~ s/^(\s)*//; + $self->{curInfo}->{synopsis} =~ s/(\s)*$//; + } + $self->{nextisSynopsis} = 0; + } + elsif ($self->{insideCoverDate}) + { + $self->{curInfo}->{printdate} = $origtext; + $self->{curInfo}->{printdate} =~ s/^(\s)*//; + + # Translate date string to date + $self->{curInfo}->{printdate} = + GCUtils::strToTime($self->{curInfo}->{printdate}, "%B %Y"); + $self->{curInfo}->{publishdate} = $self->{curInfo}->{printdate}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{ua}->cookie_jar(HTTP::Cookies->new); + + bless($self, $class); + + $self->{isResultsTable} = 0; + $self->{itemIdx} = 0; + $self->{resultsTableColumn} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub getReturnedFields + { + my $self = shift; + + if ($self->{pass} == 1) + { + $self->{hasField} = {series => 1,}; + } + else + { + $self->{hasField} = { + title => 1, + volume => 1, + }; + } + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s/\+%28\d{4}%29$//; # strip year from end of $word (title) + + # Grab the home page first, or the pages fetched are blank + # (who knows why... must be something funky with the website) + my $response = $ua->get('http://www.comicbookdb.com/'); + + return + "http://www.comicbookdb.com/search.php?form_search=$word&form_searchtype=Title"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + + return "http://www.comicbookdb.com" . $url; + } + + sub getNumberPasses + { + return 2; + } + + sub getName + { + return "Comic Book DB"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm b/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm new file mode 100644 index 0000000..3b1229c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm @@ -0,0 +1,49 @@ +package GCPlugins::GCcomics::GCcomicsCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCcomics::GCcomicsPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['series']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm b/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm new file mode 100644 index 0000000..d05d0c8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm @@ -0,0 +1,503 @@ +package GCPlugins::GCcomics::GCmangasanctuary; + +################################################### +# +# Copyright 2005-2007 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCcomics::GCcomicsCommon; + +{ + + package GCPlugins::GCcomics::GCPluginmangasanctuary; + + use LWP::Simple qw($ua); + + use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase); + + sub start + { + my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; + if ( $self->{parsingList} )# partie en rapport à la page de résultats + { + + #The interesting part to parse looks like this : + #<li class="row1"><a href="/manhwa-rebirth-vol-2-simple-s1397-p682.html">Rebirth #2</a> <span>Manhwa</span></li> + if ( $tagname eq "a" ) + { + $self->{isDebut} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = "http://www.manga-sanctuary.com" . $attr->{href}; + $attr->{href} =~ m/\/(.*?)-.*-vol-\d+-(.*?)-s\d+-p\d+.html/; + $self->{itemsList}[ $self->{itemIdx} ]->{type} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{format} = $2; + } + } + else# partie en rapport à la page de l'élément + { + + #Commencer par récupérer l'image + #<a target="_blank" href="/couvertures/big/rebirth1gd.jpg"><img src="/couvertures/rebirth1gd.jpg"></a> + if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/couvertures.*\.[jJ][pP][gG]/ ) ) + { + my $response = $ua->get("http://www.manga-sanctuary.com" . $attr->{href}); + if ($response->content_type =~ m/text\/html/) #la grande image n'existe pas + { + $self->{downloadThumbnail} = 1; + } + else#la grande image existe + { + $self->{curInfo}->{image} = "http://www.manga-sanctuary.com" . $attr->{href}; + } + } + if ( ( $tagname eq "img" ) && ( $attr->{src} =~ m/couvertures.*\.[jJ][pP][gG]/ ) && ($self->{downloadThumbnail} == 1) ) + { + $self->{curInfo}->{image} = "http://www.manga-sanctuary.com" . $attr->{src}; + $self->{downloadThumbnail} =0; + } + #Code général détection dt et dd + if ( $tagname eq "dt") + { + $self->{tagDTdetected} =1; + }elsif ( $tagname eq "dd") + { + $self->{tagDDdetected} =1; + }elsif ( $tagname eq "h3") + { + $self->{tagH3detected} =1; + }elsif ( $tagname eq "p") + { + $self->{tagPdetected} =1; + }elsif ( $tagname eq "a") + { + $self->{tagAdetected} =1; + } + #Code pour différencier les types de titres (original /français) + if ( ( $tagname eq "img") && ( $attr->{src} =~ m/\/design\/img\/flags/ ) && ($self->{titleDetected} == 1) ) + { + $attr->{src} =~ m/\/(\d*)\.png$/; + if ($1 == 77) + { + $self->{titreFrancais} = 1; + } + else + { + $self->{titreFrancais} = 0; + } + } + #Code pour récupérer la notation + #<ul id="notation">\nStaff MS:<img src="/design/img/9.gif" title="8.5/10"/></ul> + if ( ( $tagname eq "ul") && ( $attr->{id} =~ m/notation/ ) ) + { + $self->{notationDetected} = 1; + }elsif ( ( $tagname eq "img") && ( $self->{notationDetected} == 1 ) ) + { + $attr->{title} =~ m/^(\d*\.?\d*)\/10/; + $self->{curInfo}->{rating} = $1; + $self->{notationDetected} = 0; + + #Récupération du format dans l'adresse de la page. + #http://www.manga-sanctuary.com/manga-duds-hunt-vol-1-simple-s1169-p1477.html + #Peut être fait dès que webPage est renseigné, placé ici pour être sûr de n'être lancé qu'une seule fois. + $self->{curInfo}->{webPage} =~ m/vol-\d+-(.*?)-s\d+-p\d+\.html/; + $self->{curInfo}->{format} = $1; + } + } + } + + sub end + { + my ( $self, $tagname ) = @_; + if ( $self->{parsingList} )# partie en rapport à la page de résultats + { + if ( ( $tagname eq "a" ) && $self->{isFin} == 1 ) + { + #end of collection, next field is title + $self->{isFin} = 0; + } + } + else# partie en rapport à la page de l'élément + { + #Code général détection dt et dd + if ( $tagname eq "dt") + { + $self->{tagDTdetected} =0; + }elsif ( $tagname eq "dd") + { + $self->{tagDDdetected} =0; + #RAZ en cas de champ vide + $self->{titleDetected} =0; + $self->{titreFrancais} = 1; + $self->{publisherDetected} =0; + $self->{collectionDetected} =0; + $self->{publishdateDetected} =0; + $self->{costDetected} =0; + $self->{typeDetected} =0; + $self->{categoryDetected} =0; + $self->{genresDetected} =0; + $self->{scenaristeDetected} =0; + $self->{dessinateurDetected} =0; + }elsif ( $tagname eq "div")#Le code à récupérer pour un titre h3 donné se trouve après la balise <\h3> donc on ne peut pas l'utiliser. + { + $self->{tagH3detected} =0; + }elsif ( $tagname eq "p") + { + $self->{tagPdetected} =0; + #RAZ en cas de champ vide + $self->{synopsisDetected} =0; + $self->{critiquesDetected} =0; + $self->{reactionsDetected} =0; + }elsif ( $tagname eq "a") + { + $self->{tagAdetected} =0; + }elsif ( $tagname eq "ul" ) + { + $self->{notationDetected} = 0; + } + } + } + + sub text + { + my ( $self, $origtext ) = @_; + + return if ( $origtext eq " " ); + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} )# partie en rapport à la page de résultats + { + if ( $self->{isDebut} ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isDebut} = 0; + $self->{isFin} = 1; + } + } + else# partie en rapport à la page de l'élément + { + + if ( $self->{tagDTdetected} == 1 ) + { + #Title + #<dt><label>Titre <img src="/design/img/flags/112.png"></label></dt><dd>銃夢 Last Order </dd><dt><label>Titre <img src="/design/img/flags/77.png"></label></dt><dd>Gunnm Last Order</dd> + if ($origtext =~ m/^Titre/) + { + $self->{titleDetected} =1; + } + #Volume + #<dt><label>Volume:</label></dt>\n<dd>1/23</dd> + elsif ($origtext =~ m/^Volume/) + { + $self->{volumeDetected} =1; + } + #Publisher + #<dt><label>Editeur:</label></dt>\n<dd><a href="http://www.manga-sanctuary.com/bdd/editeurs/6-glenat.html" title="Glénat">Glénat</a></dd> + elsif ($origtext =~ m/^Editeur/) + { + $self->{publisherDetected} =1; + } + #collection + #<dt><label>Label:</label></dt>\n<dd>Kana Shonen</dd> + elsif ($origtext =~ m/^Label/) + { + $self->{collectionDetected} =1; + } + #PublishDate + #<dt><label>Date de sortie:</label></dt>\n<dd>31/10/2002</dd> + elsif ($origtext =~ m/^Date de sortie/) + { + $self->{publishdateDetected} =1; + } + #cost + #<dt><label>Prix:</label></dt>\n<dd>6.5 EUR</dd> + elsif ($origtext =~ m/^Prix/) + { + $self->{costDetected} =1; + } + #type + #<dt><label>Type:</label></dt>\n<dd>Manga</dd> + elsif ($origtext =~ m/^Type/) + { + $self->{typeDetected} =1; + } + #category + #<dt><label>Catégorie:</label></dt>\n<dd>Seinen</dd> + elsif ($origtext =~ m/^Catégorie/) + { + $self->{categoryDetected} =1; + } + #Genres [NOTE: pas d'accès aux tags alors je le mets dans synopsis] + #<dt><label>Genres:</label></dt>\n<dd>Action, SF</dd> + elsif ($origtext =~ m/^Genres/) + { + $self->{genresDetected} =1; + } + #scenariste [de la fiche série] + #<dt><label>Scénariste</label></dt> + elsif ($origtext =~ m/^Scénariste/) + { + $self->{scenaristeDetected} =1; + } + #dessinateur [de la fiche série] + #<dt><label>Dessinateur</label></dt> + elsif ($origtext =~ m/^Dessinateur/) + { + $self->{dessinateurDetected} =1; + } + } + + if ( $self->{tagDDdetected} == 1 ) + { + if ($self->{titleDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + if ($self->{titreFrancais} == 1) + { + #$self->{curInfo}->{title} = $1; #Je désactive le titre car c'est le même que la série + $self->{curInfo}->{series} = $1; + } + else + { + $self->{curInfo}->{synopsis} .= "Titre original :".$1."\n"; + } + $self->{titleDetected} = 0; + } + elsif ($self->{volumeDetected} == 1) + { + $origtext =~ m/^(\d*)\//; + $self->{curInfo}->{volume} = $1; + $self->{volumeDetected} =0; + } + elsif ($self->{publisherDetected} == 1) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{publisherDetected} =0; + } + elsif ($self->{collectionDetected} == 1) + { + $self->{curInfo}->{collection} = $origtext; + $self->{collectionDetected} =0; + } + elsif ($self->{publishdateDetected} == 1) + { + $self->{curInfo}->{publishdate} = $origtext; + $self->{publishdateDetected} =0; + } + elsif ($self->{costDetected} == 1) + { + $origtext =~ m/^\s*(\d*\.\d*)/; + $self->{curInfo}->{cost} = $1; + $self->{costDetected} =0; + } + elsif ($self->{typeDetected} == 1) + { + $self->{curInfo}->{type} = $origtext; + $self->{typeDetected} =0; + } + elsif ($self->{categoryDetected} == 1) + { + $self->{curInfo}->{category} = $origtext; + $self->{categoryDetected} =0; + } + elsif ($self->{genresDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + $self->{curInfo}->{synopsis} .= "Genres : ".$1."\n\n"; + $self->{genresDetected} =0; + } + } + if ( $self->{tagH3detected} == 1 ) + { + #Code détection synopsis + # <h3><span>Synopsis</span></h3> + if ($origtext =~ m/^Synopsis/) + { + $self->{synopsisDetected} =1; + $self->{curInfo}->{synopsis} .= "Synopsis :\n" + } + #Code détection critiques + #<h3>Critiques du staff</h3> + elsif ($origtext =~ m/^Critiques du staff/) + { + $self->{critiquesDetected} =1; + $self->{curInfo}->{synopsis} .= "\n\nCritiques du staff :\n"; + } + #Réactions désactivées car pas super intéressant + # #Code détection reactions + # #<h3>Réactions</h3> + # elsif ($origtext =~ m/^Réactions/) + # { + # $self->{reactionsDetected} =1; + # $self->{curInfo}->{synopsis} .= "\n\nRéactions :\n"; + # } + } + if ( $self->{tagPdetected} == 1 ) + { + if ($self->{synopsisDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + $self->{curInfo}->{synopsis} .= $1."\n"; + $self->{genresDetected} =0; + }elsif ($self->{critiquesDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + $self->{curInfo}->{synopsis} .= $1."\n"; + $self->{genresDetected} =0; + } + #Réactions désactivées car pas super intéressant + # elsif ($self->{reactionsDetected} == 1) + # { + # $origtext =~ m/^\s*(.*?)\s*$/; + # $self->{curInfo}->{synopsis} .= $1."\n"; + # $self->{genresDetected} =0; + # } + } + if ( $self->{tagAdetected} == 1 ) + { + if ($self->{scenaristeDetected} == 1) + { + $self->{curInfo}->{writer} = $origtext; + $self->{scenaristeDetected} =0; + } + elsif ($self->{dessinateurDetected} == 1) + { + $self->{curInfo}->{illustrator} = $origtext; + $self->{dessinateurDetected} =0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless( $self, $class ); +#pour la recherche: +# $self->{hasField} = { +# series => 1, +# title => 1, +# volume => 1, +# }; + $self->{hasField} = { + title => 1, + type => 1, + format => 1, + }; + + + + $self->{itemIdx} = 0; + $self->{downloadThumbnail} = 0; + $self->{tagDTdetected} =0; + $self->{tagDDdetected} =0; + $self->{tagH3detected} =0; + $self->{tagPdetected} =0; + $self->{titleDetected} =0; + $self->{titreFrancais} = 1;#défaut francais + $self->{publisherDetected} =0; + $self->{collectionDetected} =0; + $self->{publishdateDetected} =0; + $self->{costDetected} =0; + $self->{typeDetected} =0; + $self->{categoryDetected} =0; + $self->{genresDetected} =0; + $self->{synopsisDetected} =0; + $self->{critiquesDetected} =0; + $self->{reactionsDetected} =0; + $self->{scenaristeDetected} =0; + $self->{dessinateurDetected} =0; + $self->{notationDetected} = 0; + + return $self; + } + + sub preProcess + { + my ( $self, $html ) = @_; + + if ( $self->{parsingList} ) # partie en rapport à la page de résultats + { + #keep only Volumes + $html =~ m/<h3>Volumes\s\(\d+\)<\/h3>\s*(.*?)\s*<h3>Critiques/s; + $html = $1; + } + else # partie en rapport à la page de l'élément + { + $html =~ m/<div id="contenu">\s*(<ul id="menu_fiche">\s*<li><a href="(http:\/\/www.manga-sanctuary.com.*?)">.*?)\s*<h3><span>Mes actions<\/span><\/h3>/s; + $html = $1; + + #récupération des infos de la fiche série + my $response = $ua->get($2); + $response->content =~ m/<h3><span>Staff<\/span><\/h3>\s*(.*?<\/dl>)/s; + + $html .= "\n\n <fiche série>\n\n".$1; + + } + + return $html; + } + + sub getSearchUrl + { + my ( $self, $word ) = @_; + $word =~ s/\+/ /g; + return ('http://www.manga-sanctuary.com/recherche/tout/', ['keywords' => $word]); + + } + + sub getItemUrl + { + my ( $self, $url ) = @_; + #Je fais le pari que cette partie n'est pas utilisée + # my @array = split( /#/, $url ); + # $self->{site_internal_id} = $array[1]; + + return $url if $url =~ /^http:/; + return "http://www.manga-sanctuary.com" . $url; + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "Manga-Sanctuary"; + } + + sub getAuthor + { + return 'Biggriffon'; + } + + sub getLang + { + return 'FR'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlapage.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlapage.pm new file mode 100644 index 0000000..604fdc4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAlapage.pm @@ -0,0 +1,267 @@ +package GCPlugins::GCfilms::GCAlapage; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAlapage; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{class} eq "tx12noirbold") + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} eq "acteurs") + { + $self->{isActors} = 1; + } + elsif ($attr->{class} eq "realisateur") + { + $self->{isDirector} = 1; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /^\/resize\.php\?ref=([0-9]*)/) + { + $self->{curInfo}->{image} = + "http://imgdata.echo.fr/disque_l?v$1r.jpg"; + } + } + elsif ($tagname eq "span") + { + $self->{insideName} = 1 if $attr->{style} eq "color:#414B55;"; + $self->{insideActors} = 1 if $attr->{class} eq "tx11gris"; + } + elsif ($tagname eq "div") + { + $self->{insideSynopsis} = 1 if $attr->{align} eq "justify"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isActors}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"actors"} .= + $self->{itemsList}[ $self->{itemIdx} ]->{"actors"} + ? ', ' . $self->capWord($origtext) + : $self->capWord($origtext); + $self->{isActors} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"director"} = + $self->capWord($origtext); + $self->{isDirector} = 0; + } + + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideName}) + { + $self->{curInfo}->{title} = $self->capWord($origtext); + $self->{insideName} = 0; + } + elsif ($self->{insideActors}) + { + $origtext =~ s/avec : (.*) - (?:[^-]* )?DVD/$1/; + $origtext =~ s/ - /, /g; + $self->{curInfo}->{actors} = $self->capWord($origtext) + if !$self->{curInfo}->{actors}; + $self->{insideActors} = 0; + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ s/\[br\]/\n/g; + $self->{curInfo}->{synopsis} .= $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($origtext =~ m/R.*alisateur :/) + { + $origtext =~ s/R.*alisateur(?: :)?(.*)/$1/; + $origtext =~ s/ - /, /g; + $self->{curInfo}->{director} = $self->capWord($origtext) + if !$self->{curInfo}->{director}; + } + elsif ($origtext =~ m/Genre :/) + { + $origtext =~ s/Genre :(.*)/$1/; + $origtext = $self->capWord($origtext); + $origtext =~ s/ \/ /,/g; + $origtext =~ s/,Video//g; + $self->{curInfo}->{genre} = $origtext if !$self->{curInfo}->{genre}; + } + elsif ($origtext =~ m/Année du film :/) + { + $origtext =~ s/Année du film :(.*)/$1/; + $self->{curInfo}->{date} = $origtext if !$self->{curInfo}->{date}; + } + elsif ($origtext =~ m/Durée du film/) + { + $origtext =~ s/Durée du film(.*)/$1/; + $self->{curInfo}->{time} = $origtext if !$self->{curInfo}->{time}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #Fix for character-encoding: + $html =~ s//'/g; + $html =~ s/
/\.\.\./g; + #' + +#<<< keep perltidy away from these lines + $html =~ s/<br>/\[br\]/gi; + $html =~ s/ / /g; + $html =~ s/<u>|<\/u>|<b>|<\/b>|<i>|<\/i>//gi; + $html =~ s/<SPAN class="(?:tx12gris6|tx12noir)">([^<]*)<\/SPAN>/$1/gi; + $html =~ s|<A href="/-/Liste/DVD/mot_real_nomprenom=.*?\?id=[0-9]*&donnee_appel=ALAPAGE" class="roll">([^<]*)</A>|<div class="realisateur">$1</div>|gi; + $html =~ s|<A href="/-/Liste/DVD/mot_art_nomprenom=.*?\?id=[0-9]*&donnee_appel=ALAPAGE" class="roll">([^<]*)</A>|<div class="acteurs">$1</div>|gi; + $html =~ s/<A href="http\:\/\/www\.alapage\.com\/-\/Liste\/DVD\/mot_(?:art_nomprenom|real_nomprenom|gen_libelle)=[^\/]*\/\?id=[0-9]*&donnee_appel=ALAPAGE[^"]*?" class="roll">([^<]*)<\/A>/$1/gi; + $html =~ s|<A .*?mot_gen_libelle=.*?>(.*?)</A>|$1|gi; + $html =~ s/<TD valign="top" class="tx12noir[^"]*">([^<]*)<\/TD>[^<]*<TD>([^<]*)<\/TD>/<td>$1 $2<\/td>/gi; + $html =~ s/<td class="tx12grisbold" align="center" bgcolor="\#E6E6E8">([^<]*)<\/td>[^<]*<TD width="2"><IMG src="\/turbo\/templates\/img\/pix\.gif" width="2" height="25" border="0" alt=""><\/TD>[^<]*<td class="tx10noir" align="center" bgcolor="\#F4F4F6" colspan="3">([0-9]* mn)<\/td>/<td>$1 $2<\/td>/gi; +#>>> + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://alapage.com/mx/?type=41&tp=L&fulltext=" . $word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://alapage.com" . $url; + } + + sub getName + { + return "Alapage.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getDefaultPictureSuffix + { + return '.jpg'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAllmovie.pm b/lib/gcstar/GCPlugins/GCfilms/GCAllmovie.pm new file mode 100644 index 0000000..e8117c5 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAllmovie.pm @@ -0,0 +1,431 @@ +package GCPlugins::GCfilms::GCAllmovie; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAllmovie; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq "a") && ($self->{isFilm})) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{isFilm} = 0; + } + if ($tagname eq "td") + { + if ($attr->{style} =~ m/284px/) + { + $self->{isFilm} = 1; + } + elsif ($attr->{style} =~ m/70px/) + { + $self->{isYear} = 1; + } + elsif ($attr->{style} =~ m/190px/) + { + $self->{isDirector} = 1; + } + } + elsif ($tagname eq "tr") + { + $self->{isFound} = 1; + } + elsif ($tagname eq "title") + { + $self->{insideHTMLtitle} = 1; + } + } + else + { + if (($tagname eq "span") && ($attr->{class} eq "title")) + { + $self->{insideTitle} = 1; + } + elsif ( + ($tagname eq "div") + && ( ($attr->{id} eq "left-sidebar-title") + || ($attr->{id} eq "left-sidebar-title-small")) + ) + { + $self->{insideLeftSidebarTitle} = 1; + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/sql=B/) + { + $self->{insideActors} = 1; + } + elsif ($self->{insideDirectorList}) + { + $self->{insideDirector} = 1; + } + elsif ($self->{insideYearRuntime}) + { + $self->{insideYear} = 1; + $self->{insideYearRuntime} = 0; + } + elsif ($self->{insideCountriesRating}) + { + $self->{insideCountry} = 1; + $self->{insideCountriesRating} = 0; + } + elsif ($self->{nextIsSeries}) + { + $self->{insideSeries} = 1; + $self->{nextIsSeries} = 0; + } + } + elsif ($tagname eq "img") + { + if ($attr->{src} =~ /http\:\/\/image\.allmusic\.com/) + { + $self->{curInfo}->{image} = ($attr->{src}); + } + elsif ($self->{insideRatingStars}) + { + $attr->{title} =~ /([\d\.]+) Stars/; + $self->{curInfo}->{ratingpress} = $1 * 2; + $self->{insideRatingStars} = 0; + } + } + elsif ($tagname eq "li") + { + if ($self->{insideGenreList}) + { + $self->{insideGenre} = 1; + } + } + elsif ($tagname eq "td") + { + if ( ($self->{insideAKA}) + && ($attr->{class} =~ m/formed-sub/)) + { + $self->{insideOtherTitles} = 1; + } + elsif ($self->{nextIsRating}) + { + $self->{insideRating} = 1; + $self->{nextIsRating} = 0; + } + elsif ($self->{nextIsRuntime}) + { + $self->{insideTime} = 1; + $self->{nextIsRuntime} = 0; + } + elsif ($attr->{colspan} == 2) + { + if ($attr->{class} eq "large-list-title") + { + } + else + { + $self->{insideSynopsis} = 1; + } + } + elsif ($attr->{class} eq "rating-stars") + { + $self->{insideRatingStars} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq "td") + { + $self->{insideSynopsis} = 0; + } + if ($tagname eq "div") + { + $self->{insideLeftSidebarTitle} = 0; + } + if ($tagname eq "table") + { + $self->{insideGenreList} = 0; + $self->{insideAKA} = 0; + $self->{curInfo}->{original} =~ s/(, )$//; + } + } + + sub text + { + my ($self, $origtext) = @_; + return if ((length($origtext) == 0) || ($origtext eq " ")); + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if (($self->{insideHTMLtitle})) + { + if ($origtext !~ m/Results/) + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + $self->{insideHTMLtitle} = 0; + } + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext + if $origtext =~ m/^[0-9]{4}?/; + $self->{isYear} = 0; + } + if ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isDirector} = 0; + } + if ($self->{isInfo}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ m|\(([0-9]*)(/I+)?\)|; + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideLeftSidebarTitle}) + { + if ($origtext eq "Genres") + { + $self->{insideGenreList} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "Director") + { + $self->{insideDirectorList} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "Year") + { + $self->{insideYearRuntime} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "Countries") + { + $self->{insideCountriesRating} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "AKA") + { + $self->{insideAKA} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + } + elsif ($origtext =~ /Is part of the series:$/) + { + $self->{nextIsSeries} = 1; + } + if ($self->{insideActors}) + { + $self->{curInfo}->{actors} .= $origtext . ', ' + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{insideActors} = 0; + } + + if ($self->{insideYear}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideYear} = 0; + $self->{nextIsRuntime} = 1; + } + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideGenre}) + { + $self->{curInfo}->{genre} .= $self->capWord($origtext) . ','; + $self->{insideGenre} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + $self->{insideDirectorList} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideCountry}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideCountry} = 0; + $self->{nextIsRating} = 1; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{curInfo}->{time} =~ s/.[0-9]*?://; + $self->{insideTime} = 0; + } + elsif ($self->{insideRating}) + { + $self->{curInfo}->{age} = 1 + if ($origtext eq 'Unrated') || ($origtext eq 'Open'); + $self->{curInfo}->{age} = 2 + if ($origtext eq 'G') || ($origtext eq 'Approved'); + $self->{curInfo}->{age} = 5 + if ($origtext eq 'PG') || ($origtext eq 'M') || ($origtext eq 'GP'); + $self->{curInfo}->{age} = 13 if $origtext eq 'PG13'; + $self->{curInfo}->{age} = 17 if $origtext eq 'R'; + $self->{curInfo}->{age} = 18 + if ($origtext eq 'NC17') || ($origtext eq 'X'); + $self->{insideRating} = 0; + } + elsif ($self->{insideOtherTitles}) + { + $self->{tempOriginal} = $origtext; + $self->{tempOriginal} =~ s/\s*$//; + $self->{tempOriginal} =~ s/^\s*//; + + $self->{curInfo}->{original} .= $self->{tempOriginal} . ', '; + $self->{insideOtherTitles} = 0; + } + elsif ($self->{insideSeries}) + { + $self->{curInfo}->{serie} = $origtext; + $self->{curInfo}->{serie} =~ s/( \[.*\])//; + $self->{insideSeries} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $wordFiltered = $word; + + # Allmovie doesn't return correct results if searching with a prefix like 'the' + $wordFiltered =~ s/^(the|a)?[+\s]+[^ a-zA-Z0-9]*\s*//i; +# return ('http://allmovie.com/search/all', ['q' => $wordFiltered,'submit' => 'SEARCH']); + return ('http://allmovie.com/search/all/' . $wordFiltered); + + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + return "http://allmovie.com" . $url; + } + + sub getName + { + return "Allmovie"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm new file mode 100644 index 0000000..db37774 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm @@ -0,0 +1,403 @@ +package GCPlugins::GCfilms::GCAllocine; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAllocine; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{insideResults} eq 1) + { + if ( ($tagname eq "a") + && ($attr->{href} =~ /^\/film\/fichefilm_gen_cfilm=/) + && ($self->{isMovie} eq 0)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq "td") && ($self->{isMovie} eq 1)) + { + $self->{isMovie} = 2; + } + elsif (($tagname eq "a") && ($self->{isMovie} eq 2)) + { + $self->{isMovie} = 3; + } + elsif (($tagname eq "br") && ($self->{isMovie} eq 3)) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/^\s*//; + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s*$//; + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s+/ /g; + $self->{isMovie} = 4; + } + elsif (($tagname eq "span") + && ($attr->{class} eq "fs11") + && ($self->{isMovie} eq 4)) + { + $self->{isInfo} = 1; + $self->{isMovie} = 0; + } + elsif (($tagname eq "br") && ($self->{isInfo} eq 1)) + { + $self->{isInfo} = 2; + } + elsif (($tagname eq "br") && ($self->{isInfo} eq 2)) + { + $self->{isInfo} = 3; + } + } + } + else + { + if (($tagname eq "div") && ($attr->{class} eq "poster")) + { + $self->{insidePicture} = 1; + } + elsif (($tagname eq "img") && ($self->{insidePicture} eq 1)) + { + my $src = $attr->{src}; + if (!$self->{curInfo}->{image}) + { + if ($src =~ /r_160_240/) + { + $self->{curInfo}->{image} = $src; + } + else + { + $self->{curInfo}->{image} = "empty"; + } + } + } + elsif ($tagname eq "h1") + { + $self->{insideTitle} = 1; + } + elsif (($tagname eq "span") && ($self->{insideDate} eq 1)) + { + $self->{insideDate} = 2; + } + elsif (($tagname eq "span") && ($attr->{itemprop} eq "duration")) + { + $self->{insideTime} = 1; + } + elsif (($tagname eq "span") && ($self->{insideDirector} eq 1)) + { + $self->{insideDirector} = 2; + } + elsif (($tagname eq "a") && ($self->{insideActor} eq 1)) + { + $self->{insideActor} = 2; + } + elsif (($tagname eq "span") && ($self->{insideGenre} eq 1)) + { + $self->{insideGenre} = 2; + } + elsif (($tagname eq "span") && ($self->{insideCountry} eq 1)) + { + $self->{insideCountry} = 2; + } + elsif (($tagname eq "span") && ($attr->{class} eq "note") && ($self->{insidePressRating} eq 1)) + { + $self->{insidePressRating} = 2; + } + elsif (($tagname eq "div") && ($attr->{class} eq "breaker")) + { + $self->{insidePressRating} = 0; + } + elsif (($tagname eq "p") && ($attr->{itemprop} eq "description")) + { + $self->{insideSynopsis} = 1; + } + elsif (($tagname eq "td") && ($self->{insideOriginal} eq 1)) + { + $self->{insideOriginal} = 2; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($tagname eq "li") + { + $self->{insideDirector} = 0; + $self->{insideActor} = 0; + $self->{insideGenre} = 0; + } + elsif ($tagname eq "div") + { + $self->{insideCountry} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActor} = 0; + } + elsif ($tagname eq "th") + { + $self->{insideSynopsis} = 0; + } + elsif ($tagname eq "table") + { + $self->{insideResults} = 0; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if (($origtext =~ m/(\d+) r..?sultats? trouv..?s? dans les titres de films/) && ($1 > 0)) + { + $self->{insideResults} = 1; + } + if ($self->{isMovie} eq 3) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext; + } + if ($self->{isInfo} eq 1) + { + if ($origtext =~ /\s*([0-9]{4})/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + } + elsif ($self->{isInfo} eq 2) + { + if ($origtext =~ /^\s*de (.*)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $1; + } + } + elsif ($self->{isInfo} eq 3) + { + if ( ($origtext =~ m/^\s*avec (.*)/) + && (!$self->{itemsList}[ $self->{itemIdx} ]->{actors})) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1; + } + $self->{isInfo} = 0; + } + } + else + { + my ($self, $origtext) = @_; + $origtext =~ s/[\r\n]//g; + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + + if ($self->{insideTitle} eq 1) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif (($self->{insideDate} eq 2) && (length($origtext) > 1)) + { + $self->{curInfo}->{date} = $origtext + if !($origtext =~ /inconnu/); + $self->{insideDate} = 0; + } + elsif (($origtext =~ /^Date de sortie/) + && (!$self->{curInfo}->{date})) + { + $self->{insideDate} = 1; + } + elsif (($origtext =~ /^Date de reprise/) + && (!$self->{curInfo}->{date})) + { + $self->{insideDate} = 1; + } + elsif ($self->{insideTime} eq 1) + { + $origtext =~ /(\d+)h\s*(\d+)m/; + my $time = ($1*60) + $2; + $self->{curInfo}->{time} = $time." m."; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector} eq 2) + { + $origtext = ", " if $origtext =~ m/^,/; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($origtext =~ /^R..?alis..? par/) + { + $self->{insideDirector} = 1; + } + elsif ($self->{insideActor} eq 2) + { + $origtext =~ s/plus//; + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{actors} .= $origtext; + } + elsif ($origtext =~ /^Avec/) + { + $self->{insideActor} = 1; + } + elsif ($self->{insideGenre} eq 2) + { + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{genre} .= $origtext; + } + elsif ($origtext =~ /^[\s\n]*Genre/) + { + $self->{insideGenre} = 1; + } + elsif ($self->{insideCountry} eq 2) + { + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{country} .= $origtext; + } + elsif ($origtext =~ /Nationalité/) + { + $self->{insideCountry} = 1; + } + elsif ($origtext =~ /^Presse$/) + { + $self->{insidePressRating} = 1; + } + elsif ($self->{insidePressRating} eq 2) + { + $origtext =~ s/,/./; + $self->{curInfo}->{ratingpress} .= $origtext * 2; + } + elsif ($origtext =~ /^Interdit aux moins de (\d+) ans/) + { + $self->{curInfo}->{age} = $1; + } + elsif ($self->{insideSynopsis} eq 1) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideOriginal} eq 2) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($origtext =~ /^Titre original/) + { + $self->{insideOriginal} = 1; + } + + + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{insideResults} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{actorsCounter} = 0; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # f=3 ? + # return "http://www.allocine.fr/recherche/?q=$word&f=3&rub=1"; + return "http://www.allocine.fr/recherche/1/?q=$word"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.allocine.fr" . $url; + } + + sub getName + { + return "Allocine.fr"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + # return "UTF-8"; # For 1.5.0 Win32 + return "ISO-8859-1"; # For 1.5.0 Win32 with /lib/gcstar/GCPlugins/ ver.1.5.9svn + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm new file mode 100644 index 0000000..75c6854 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm @@ -0,0 +1,435 @@ +package GCPlugins::GCfilms::GCAlpacineES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginAlpacineES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + # Código para procesar el resultado de la busqueda + if ($self->{parsingList}){ + # Guardamos la fecha. + if ($self->{inside}->{li} && $self->{insideInfos}){ + $origtext =~ /. \(([0-9]{4})\)/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; + } + # Guardamos el título + if ($self->{inside}->{a} && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + } + } + + else{ + # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto + $origtext =~ s/^\s*|\s{2,}|\s*$//g; + # Estamos procesando el titulo + if ($self->{insideTitle}) + { + # Obtenemos titulo y fecha + $origtext =~ /(.*) \(([0-9]{4})\)/; + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + $self->{insideTitle} = 0; + return; + } + + # Si existe el hipervinculo "Ampliar" cambiamos la imagen por la ampliada + if ($self->{inside}->{a} && $origtext eq "Ampliar"){ + $self->{curInfo}->{image} =~ /(http:\/\/img.alpacine.com\/carteles\/.*)-[0-9]*(\.jpg)/; + $self->{curInfo}->{image} = $1 . $2; + return; + } + # Estamos en la puntuación real + if($self->{insideRating}){ + $self->{curInfo}->{ratingpress} = int( $origtext + 0.5 ); + $self->{insideRating} = 0; + } + # No hay puntuación real, asignamos 0 por defecto + if($self->{inside}->{div}){ + if($origtext =~ /Esperando \d votos/){ + $self->{curInfo}->{ratingpress} = 0; + } + } + # Procesamos el titulo original + if ($self->{isOrigTit} eq 1) { + $self->{isOrigTit} = 0; + $self->{curInfo}->{original} = $origtext; + return; + } + # Procesamos los generos (gen, gen, gen, gen...) + if ($self->{isGenres} eq 1) { + if($origtext ne ""){ + # hacemos uso de sus propias comas + $self->{curInfo}->{genre} .= $origtext; + } + else{ + $self->{isGenres} = 0; + } + return; + } + # Procesamos el país + if ($self->{isCountry} eq 1) { + $self->{isCountry} = 0; + $self->{curInfo}->{country} = $origtext; + return; + } + # Procesamos la duración + if ($self->{isTime} eq 1) { + $self->{isTime} = 0; + $self->{curInfo}->{time} = $origtext; + return; + } + # Procesamos los directores + if ($self->{isDirector} eq 1) { + if($origtext ne ""){ + if($self->{curInfo}->{director} eq ""){ + $self->{curInfo}->{director} .= $origtext; + } + else{ + $self->{curInfo}->{director} .= ", $origtext"; + } + } + else{ + $self->{isDirector} = 0; + } + return; + } + # Actores + if ($self->{isActors} eq 1) { + if($origtext ne ""){ + if($self->{curInfo}->{actors} eq ""){ + $self->{curInfo}->{actors} .= $origtext; + } + else{ + $self->{curInfo}->{actors} .= ", $origtext"; + } + } + else{ + $self->{isActors} = 0; + } + return; + } + # Procesamos la Sinopsis + if ($self->{isSynopsis} eq 1) { + $self->{isSynopsis} = 0; + $self->{curInfo}->{synopsis} = $origtext; + return; + } + # Procesamos los premios + if ($self->{isAwards} eq 1) { + $self->{isAwards} = 0; + $self->{curInfo}->{synopsis} = $self->{curInfo}->{synopsis}. "\n\nPremios:\n\t".$origtext; + $self->{insideInfos} = 0; + return; + } + + # Condiciones para procesar los campos en el siguiente ciclo + if($self->{insideInfos}){ + $self->{isOrigTit} = 1 if $origtext eq "Título original:"; + $self->{isGenres} = 1 if $origtext eq "Género:"; + $self->{isCountry} = 1 if $origtext eq "País:"; + $self->{isTime} = 1 if $origtext eq "Duración:"; + $self->{isDirector} = 1 if $origtext eq "Dirección:"; + $self->{isActors} = 1 if $origtext eq "Interpretación:"; + $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:"; + $self->{isAwards} = 1 if $origtext eq "Premios:"; + } + } + } + + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + # Código para procesar el resultado de la busqueda + #if ($self->{parsingList}){ + #} + # Código para procesar la información de la pelicula seleccionada + #else { + #} + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + # Código para procesar el resultado de la busqueda para generar el listado + if ($self->{parsingList}) + { + # Comprobamos si estamos dentro de un título utilizando el atributo class + if( ($tagname eq "li" ) && ($attr->{class} ne "mas" )){ + $self->{itemIdx}++; + $self->{insideInfos} = 1 ; + return; + } + if( ($tagname eq "li" ) && ($attr->{class} eq "mas" )){ + $self->{insideInfos} = 0; + return; + } + # Si estamos en un título y encontramos una tag a, es un enlace a ficha + if ($tagname eq "a" && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.alpacine.com".$attr->{href}; + return; + } + } + # Código para procesar la información de la pelicula seleccionada + else { + if ($tagname eq "h1"){ + $self->{insideTitle} = 1; + return; + } + # Si estamos dentro de una imagen y el src es el del thumb lo asignamos como imagen + if ($tagname eq "img") + { + # Extraemos la dirección de la imagen thumb + if ($attr->{src} =~ /http:\/\/img.alpacine.com\/carteles\/.*\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src}; + } + return; + } + + if ($tagname eq "div" && $attr->{class} eq "voto"){ + $self->{insideRating} = 1; + return; + } + + if( $tagname eq "div" && $attr->{class} eq "datos" ){ + $self->{insideInfos} = 1 ; + return; + } + } + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # Anulamos el html si coincide con el patron de no resultados + if($html =~ /^.*No hay resultados para.*$/s){ + $html = ""; + return $html; + } + + # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html + # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa + if($html =~ s/^.*<div class="titulo">Pel.culas <span class="resultados">\([0-9]* resultado[s]?\)<\/span><\/div><ul>(<li><a.*<\/a> \([0-9]*\)<\/li>).*$/$1/s){ + return $html; + } + + # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html + # Comprobamos si la pelicula contiene o no premios y nos quedamos con lo que corresponda + if($html =~ /^.*<div class="titulo">Premios:.*más\.\.\.<\/a><\/div><\/div>.*$/s){ + $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>.*\n.*<div class="datox"><div class="titulo">Premios:.*)más\.\.\.<\/a><\/div><\/div>.*$/$1/s; + } + else{ + $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>)\n\n\t\t\t\t\n\n\t\t\t\t<hr \/>.*$/$1/s; + } + return $html; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + #sub changeUrl + #{ + # my ($self, $url) = @_; + # return $url; + #} + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + #sub getExtra + #{ + # return 'Extra'; + #} + + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return "ES"; + } + + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return "DoVerMan"; + } + + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return 'Alpacine'; + } + + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + #sub getCharset + #{ + # my $self = shift; + # # Charset de la web + # return "UTF-8"; + #} + + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + # Hack para evitar problemas con acentos + $word =~ s/%E1/a/g; + $word =~ s/%E9/e/g; + $word =~ s/%ED/i/g; + $word =~ s/%F3/o/g; + $word =~ s/%FA/u/g; + $word =~ s/%C1/A/g; + $word =~ s/%C9/E/g; + $word =~ s/%CD/I/g; + $word =~ s/%D3/O/g; + $word =~ s/%DA/U/g; + + return "http://www.alpacine.com/buscar/?buscar=" . $word; + + } + + + # Constructor + sub new + { + # Inicialización + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran + # en el listado de resultados + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + # Indica si estamos procesando información útil + $self->{insideInfos} = 0; + + # Indican el estado del procesado del listado de resultados + $self->{insideRating} = 0; + $self->{insideTitle} = 0; + + $self->{isOrigTit} = 0; + $self->{isGenres} = 0; + $self->{isCountry} = 0; + $self->{isTime} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isSynopsis} = 0; + $self->{isAwards} = 0; + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazon.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazon.pm new file mode 100644 index 0000000..8692a1b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazon.pm @@ -0,0 +1,281 @@ +package GCPlugins::GCfilms::GCAmazon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsAmazonCommon; + +{ + package GCPlugins::GCfilms::GCPluginAmazon; + + use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq 'input') + { + $self->{beginParsing} = 1 + if $attr->{src} =~ /go-button-search/; + } + return if ! $self->{beginParsing}; + if ($tagname eq 'srtitle') + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'publication') + { + $self->{isPublication} = 1; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1; + } + if ($tagname eq 'a') + { + my $urlId; + if ($urlId = $self->isItemUrl($attr->{href})) + { + $self->{isTitle} = 2 if $self->{isTitle} eq '1'; + return if $self->{alreadyRetrieved}->{$urlId}; + $self->{alreadyRetrieved}->{$urlId} = 1; + $self->{currentRetrieved} = $urlId; + my $url = $attr->{href}; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if (($tagname eq "img") && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'content')) + { + $self->{insideContent} = 1; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescriptionWrapper')) + { + $self->{insideSynopsis} = 1 + if (!$self->{curInfo}->{synopsis}); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'emptyClear')) + { + $self->{insideSynopsis} = 0; + } + elsif (($tagname eq "span") && ($self->{insideAge}) && ($attr->{class} =~ /medSprite/)) + { + $attr->{class} =~ s/\s*$//; + $self->{curInfo}->{age} = 2 if ($attr->{class} =~ m/G$/); + $self->{curInfo}->{age} = 5 if ($attr->{class} =~ m/PG$/); + $self->{curInfo}->{age} = 13 if ($attr->{class} =~ m/PG13$/); + $self->{curInfo}->{age} = 18 if ($attr->{class} =~ m/R$/); + $self->{insideAge} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + if ($tagname eq "li") + { + $self->{insideActors} = 0; + $self->{insideDirector} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + return if GCPlugins::GCstar::GCPluginAmazonCommon::text(@_); + return if length($origtext) < 2; + return if ($self->{parsingEnded}); + if ($self->{parsingList}) + { + return if ! $self->{beginParsing}; + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + return; + } + elsif ($self->{isPublication}) + { + $origtext =~ m/([0-9]{4})/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + $self->{isPublication} = 0; + return; + } + elsif ($self->{isActors}) + { + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{isActors} = 0; + return; + } + } + else + { + $origtext =~ s/\s{2,}/ /g; + + if ($self->{insideNameAndDate}) + { + if ($origtext =~ m/(.*) \(([0-9]{4})\)/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + } + $self->{insideNameAndDate} = 0; + } + elsif (($self->{insideActors}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/\s+,/,/; + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$origtext]; + $self->{actorsCounter}++; + } + } + elsif (($self->{insideDirector}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} .= ", " + if $self->{curInfo}->{director}; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^\s//; + $origtext =~ s/\n//g; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + $origtext =~ s/\s*$//; + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + elsif (($self->{insideSynopsis}) && ($origtext ne '')) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{inside}->{b}) + { + $self->{insideActors} = 1 if $origtext =~ /Actors:/; + $self->{insideDirector} = 1 if $origtext =~ /Directors:/; + $self->{insideTime} = 1 if $origtext =~ /Run Time:/; + $self->{insideGenre} = 1 if $origtext =~ /Genre:/; + $self->{insideAge} = 1 if $origtext =~ /Rating:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 1, + }; + + $self->{suffix} = 'com'; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html = $self->SUPER::preProcess($html); + if ($self->{parsingList}) + { + $self->{isItem} = 0; + $html =~ s|~(.*?)<span class="bindingBlock">\(<span class="binding">(.*?)</span>( - .*?[0-9]{4})?\)</span>|<actors>$1</actors><format>$2</format><publication>$3</publication>|gsm; + } + else + { + $html =~ s/(<i>|<\/i>)//gim; + $html =~ s/<p>/\n/gim; + $html =~ s|</p>|\n|gim; + $html =~ s/(<ul>|<\/ul>)/\n/gim; + $html =~ s/<li>([^<])/- $1/gim; + $html =~ s|([^>])</li>|$1\n|gim; + $html =~ s|<br ?/?>|\n|gi; + $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm; + $html =~ s|<a href="[^"]*search-alias=dvd&field-keywords=[^"]*">(.*?)</a>|$1|gm; + } + $self->{parsingEnded} = 0; + $self->{alreadyRetrieved} = {}; + $self->{beginParsing} = 0; + + return $html; + } + + sub getName + { + return "Amazon (US)"; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazonDE.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazonDE.pm new file mode 100644 index 0000000..fab2b9f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazonDE.pm @@ -0,0 +1,291 @@ +package GCPlugins::GCfilms::GCAmazonDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsAmazonCommon; + +{ + package GCPlugins::GCfilms::GCPluginAmazonDE; + + use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq 'input') + { + $self->{beginParsing} = 1 + if $attr->{src} =~ /go-button-search/; + } + return if ! $self->{beginParsing}; + return if ! $self->{beginParsing}; + if ($tagname eq 'srtitle') + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'publication') + { + $self->{isPublication} = 1; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1; + } + if ($tagname eq 'a') + { + my $urlId; + if ($urlId = $self->isItemUrl($attr->{href})) + { + $self->{isTitle} = 2 if $self->{isTitle} eq '1'; + return if $self->{alreadyRetrieved}->{$urlId}; + $self->{alreadyRetrieved}->{$urlId} = 1; + $self->{currentRetrieved} = $urlId; + my $url = $attr->{href}; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if (($tagname eq "img") && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'content')) + { + $self->{insideContent} = 1; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescriptionWrapper')) + { + $self->{insideSynopsis} = 1 + if (!$self->{curInfo}->{synopsis}); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'emptyClear')) + { + $self->{insideSynopsis} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + if ($tagname eq "li") + { + $self->{insideActors} = 0; + $self->{insideDirector} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if GCPlugins::GCstar::GCPluginAmazonCommon::text(@_); + return if length($origtext) < 2; + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + return if ! $self->{beginParsing}; + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + return; + } + elsif ($self->{isPublication}) + { + $origtext =~ m/([0-9]{4})/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + $self->{isPublication} = 0; + return; + } + elsif ($self->{isActors}) + { + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{isActors} = 0; + return; + } + } + else + { + $origtext =~ s/\s{2,}//g; + if ($self->{insideNameAndDate}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideNameAndDate} = 0; + }
+ elsif (($self->{insideActors}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/\s+,/,/; + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$origtext]; + $self->{actorsCounter}++; + } + }
+ elsif (($self->{insideDirector}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} .= ", " + if $self->{curInfo}->{director}; + $self->{curInfo}->{director} .= $origtext; + }
+ elsif ($self->{insideTime})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g;
+ $self->{curInfo}->{time} = $origtext;
+ $self->{insideTime} = 0;
+ }
+ elsif ($self->{insideDate})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g; + $origtext =~ s/\-$//;
+ $self->{curInfo}->{date} = $origtext;
+ $self->{insideDate} = 0;
+ }
+ elsif (($self->{insideSynopsis}) && ($origtext ne '')) + { + $self->{curInfo}->{synopsis} .= $origtext; + }
+ elsif ($self->{insideAudio})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{audio} = $origtext;
+ $self->{insideAudio} = 0;
+ }
+ elsif ($self->{insideSubTitle})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{subt} = $origtext;
+ $self->{insideSubTitle} = 0;
+ } + elsif ($self->{inside}->{b}) + { + $self->{insideActors} = 1 if $origtext =~ /Darsteller:/; + $self->{insideDirector} = 1 if $origtext =~ /Regisseur\(e\):/; + $self->{insideDate} = 1 if $origtext =~ /Erscheinungstermin:/; + $self->{insideTime} = 1 if $origtext =~ /Spieldauer:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 1, + }; + + $self->{suffix} = 'de'; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html = $self->SUPER::preProcess($html); + if ($self->{parsingList}) + { + $self->{isItem} = 0; + $html =~ s|~(.*?)<span class="bindingBlock">\(<span class="binding">(.*?)</span>( - .*?[0-9]{4})?\)</span>|<actors>$1</actors><format>$2</format><publication>$3</publication>|gsm; + + } + else + { + $html =~ s/(<i>|<\/i>)//gim; + $html =~ s/<p>/\n/gim; + $html =~ s|</p>|\n|gim; + $html =~ s/(<ul>|<\/ul>)/\n/gim; + $html =~ s/<li>([^<])/- $1/gim; + $html =~ s|([^>])</li>|$1\n|gim; + $html =~ s|<br ?/?>|\n|gi; + $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm; + $html =~ s|<a href="[^"]*search-alias=dvd&field-keywords=[^"]*">(.*?)</a>|$1|gm; + #" + $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd-de&field-(?:actor|director|keywords)=[^\/]*\/[-0-9]*">([^<]*)<\/a>/$1/gm; + } + + $self->{parsingEnded} = 0; + $self->{alreadyRetrieved} = {}; + $self->{beginParsing} = 1; + + return $html; + } + + sub getName + { + return "Amazon (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazonFR.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazonFR.pm new file mode 100644 index 0000000..cbb6674 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazonFR.pm @@ -0,0 +1,304 @@ +package GCPlugins::GCfilms::GCAmazonFR;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCfilms::GCfilmsAmazonCommon;
+
+{
+ package GCPlugins::GCfilms::GCPluginAmazonFR;
+
+ use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase);
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingEnded})
+ {
+ if ($self->{itemIdx} < 0)
+ {
+ $self->{itemIdx} = 0;
+ $self->{itemsList}[0]->{url} = $self->{loadedUrl};
+ }
+ return;
+ }
+
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'input')
+ {
+ $self->{beginParsing} = 1
+ if $attr->{src} =~ /go-button-search/;
+ }
+ return if ! $self->{beginParsing};
+ if ($tagname eq 'srtitle') + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'publication') + { + $self->{isPublication} = 1; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1; + } + if ($tagname eq 'a') + { + my $urlId; + if ($urlId = $self->isItemUrl($attr->{href})) + { + $self->{isTitle} = 2 if $self->{isTitle} eq '1'; + return if $self->{alreadyRetrieved}->{$urlId}; + $self->{alreadyRetrieved}->{$urlId} = 1; + $self->{currentRetrieved} = $urlId; + my $url = $attr->{href}; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + }
+ }
+ else
+ {
+ if (($tagname eq "img") && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'content')) + { + $self->{insideContent} = 1; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescriptionWrapper')) + { + $self->{insideSynopsis} = 1 + if (!$self->{curInfo}->{synopsis}); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'emptyClear')) + { + $self->{insideSynopsis} = 0; + } + elsif (($tagname eq "span") && ($self->{insideAge}) && ($attr->{class} =~ /medSprite/)) + { + $attr->{class} =~ s/\s*$//; + $self->{curInfo}->{age} = 2 if ($attr->{class} =~ m/G$/); + $self->{curInfo}->{age} = 5 if ($attr->{class} =~ m/PG$/); + $self->{curInfo}->{age} = 13 if ($attr->{class} =~ m/PG13$/); + $self->{curInfo}->{age} = 18 if ($attr->{class} =~ m/R$/); + $self->{insideAge} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--; + if ($tagname eq "li") + { + $self->{insideActors} = 0; + $self->{insideDirector} = 0; + }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ return if length($origtext) < 2;
+
+ if ($self->{parsingList})
+ { + return if ! $self->{beginParsing}; + if (($self->{inside}->{title})
+ && ($origtext !~ /^Amazon.fr/))
+ {
+ $self->{parsingEnded} = 1;
+ }
+ if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + return; + } + elsif ($self->{isPublication}) + { + $origtext =~ m/([0-9]{4})/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + $self->{isPublication} = 0; + return; + } + elsif ($self->{isActors}) + { + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{isActors} = 0; + return; + }
+ }
+ else
+ {
+ $origtext =~ s/\s{2,}//g;
+
+ if ($self->{insideNameAndDate}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideNameAndDate} = 0; + }
+ elsif (($self->{insideActors}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/\s+,/,/; + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$origtext]; + $self->{actorsCounter}++; + } + }
+ elsif (($self->{insideDirector}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} .= ", " + if $self->{curInfo}->{director}; + $self->{curInfo}->{director} .= $origtext; + }
+ elsif ($self->{insideTime})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g;
+ $self->{curInfo}->{time} = $origtext;
+ $self->{insideTime} = 0;
+ }
+ elsif ($self->{insideDate})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g; + $origtext =~ s/\-$//;
+ $self->{curInfo}->{date} = $origtext;
+ $self->{insideDate} = 0;
+ }
+ elsif (($self->{insideSynopsis}) && ($origtext ne '')) + { + $self->{curInfo}->{synopsis} .= $origtext; + }
+ elsif ($self->{insideAudio})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{audio} = $origtext;
+ $self->{insideAudio} = 0;
+ }
+ elsif ($self->{insideSubTitle})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{subt} = $origtext;
+ $self->{insideSubTitle} = 0;
+ }
+ elsif ($self->{inside}->{b})
+ {
+ $self->{insideActors} = 1 if $origtext =~ /Acteurs\s*:/;
+ $self->{insideDirector} = 1 if $origtext =~ /R.alisateurs?\s*:/;
+ $self->{insideDate} = 1 if $origtext =~ /Date de sortie/;
+ $self->{insideTime} = 1 if $origtext =~ /Dur.e\s*:/;
+ $self->{insideAudio} = 1 if $origtext =~ /Langue\s*:/;
+ $self->{insideSubTitle} = 1 if $origtext =~ /Sous-titres\s*:/;
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ date => 1,
+ director => 0,
+ actors => 1,
+ };
+
+ $self->{suffix} = 'fr';
+
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ $html = $self->SUPER::preProcess($html);
+ if ($self->{parsingList})
+ {
+ $self->{isItem} = 0; + $html =~ s|~(.*?)<span class="bindingBlock">\(<span class="binding">(.*?)</span>( - .*?[0-9]{4})?\)</span>|<actors>$1</actors><format>$2</format><publication>$3</publication>|gsm; +
+ }
+ else
+ {
+ $html =~ s/(<i>|<\/i>)//gim;
+ $html =~ s/<p>/\n/gim;
+ $html =~ s|</p>|\n|gim;
+ $html =~ s/(<ul>|<\/ul>)/\n/gim;
+ $html =~ s/<li>([^<])/- $1/gim;
+ $html =~ s|([^>])</li>|$1\n|gim;
+ $html =~ s|<br ?/?>|\n|gi;
+ $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm;
+# $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd-fr&field-(?:actor|director|keywords)=[^\/]*\/[-0-9]*">([^<]*)<\/a>/$1/gm;
+ $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd-fr&field-(?:actor|director|keywords)=[^\/]*">([^<]*)<\/a>/$1/gm;
+ #"
+ }
+
+ $self->{parsingEnded} = 0;
+ $self->{alreadyRetrieved} = {};
+ $self->{beginParsing} = 1;
+
+ return $html;
+ }
+
+ sub getName
+ {
+ return "Amazon (FR)";
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazonUK.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazonUK.pm new file mode 100644 index 0000000..096bb08 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazonUK.pm @@ -0,0 +1,264 @@ +package GCPlugins::GCfilms::GCAmazonUK; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# Edited 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsAmazonCommon; + +{ + package GCPlugins::GCfilms::GCPluginAmazonUK; + + use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if (($self->{beginParsing} eq 0) && ($tagname eq 'div') && ($attr->{id} eq 'Results')) + { + $self->{beginParsing} = 1; + } + if (($self->{beginParsing}) && ($tagname eq 'table') && ($attr->{class} eq 'pagnTable')) + { + $self->{beginParsing} = 0; + $self->{parsingEnded} = 1; + } + return if ! $self->{beginParsing}; + if ($tagname eq 'a') + { + if (($self->{isItem}) && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isUrl} = 0 ; + } + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'dataColumn')) + { + $self->{isItem} = 1 ; + $self->{isUrl} = 1 ; + $self->{isName} = 1 ; + } + } + else + { + if ($tagname eq "img") + { + if (!$self->{curInfo}->{image}) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + } + elsif (($tagname eq "div") && ($attr->{class} eq "productDescriptionWrapper")) + { + $self->{isSynopsis} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if (($tagname eq 'li') && ($self->{insideActors})) + { + $self->{insideActors} = 0; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + return if GCPlugins::GCstar::GCPluginAmazonCommon::text(@_); + return if length($origtext) < 2; + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + return if ! $self->{beginParsing}; + if ($self->{isName}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isName} = 0; + $self->{isItem} = 0; + $self->{inActors} = 1; + } + elsif ($self->{inActors} && $self->{inside}->{td}) + { + $origtext =~ s/^\W*//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{inActors} = 0; + return; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideNameAndDate}) + { + (my $year = $origtext) =~ s/.*\[([0-9]{4})\].*/$1/ ; + (my $title = $origtext) =~ s/^([^\[]*).*$/$1/ ; + $self->{curInfo}->{title} = $title; + $self->{curInfo}->{origtitle} = $title; + $self->{curInfo}->{date} = $year; + $self->{insideNameAndDate} = 0; + } + elsif (($self->{insideActors}) && $self->{inside}->{a}) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{actors} .= $origtext.', '; + } + elsif ($self->{insideAge}) + { + $origtext =~ m/([0-9]{1,2})/; + $self->{curInfo}->{age} = $1; + $self->{insideAge} = 0; + } + elsif ($self->{insideDirector}) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^\s//; + $origtext =~ s/\n//g; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext if ! $self->{hasSynopsis}; + $self->{isSynopsis} = 0; + $self->{hasSynopsis} = 1; + } + elsif ($self->{inside}->{b}) + { + $self->{insideActors} = 1 if $origtext =~ /Actors:/; + $self->{insideDirector} = 1 if $origtext =~ /Directors:/; + $self->{insideAge} = 1 if $origtext =~ /Classification:/; + $self->{insideTime} = 1 if $origtext =~ /Run Time:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 1, + }; + + $self->{suffix} = 'co.uk'; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html = $self->SUPER::preProcess($html); + if ($self->{parsingList}) + { + $self->{isItem} = 0; + } + else + { + $html =~ s/(<i>|<\/i>)//gim; + $html =~ s/<p>/\n/gim; + $html =~ s|</p>|\n|gim; + $html =~ s/(<ul>|<\/ul>)/\n/gim; + $html =~ s/<li>([^<])/- $1/gim; + $html =~ s|([^>])</li>|$1\n|gim; + $html =~ s|<br ?/?>|\n|gi; + $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm; + $html =~ s|<a href="[^"]*search-alias=dvd&field-keywords=[^"]*">(.*?)</a>|$1|gm; + + $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd&field-(?:actor|director|keywords)=[^\/]*\/[-0-9]*">([^<]*)<\/a>/$1/gm; + } + + $self->{parsingEnded} = 0; + $self->{alreadyRetrieved} = {}; + $self->{beginParsing} = 0; + + return $html; + } + + sub getName + { + return "Amazon (UK)"; + } + + sub getLang + { + return 'EN'; + } + + sub getAuthor + { + return 'Tian & FiXx'; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAniDB.pm b/lib/gcstar/GCPlugins/GCfilms/GCAniDB.pm new file mode 100644 index 0000000..1c62e7c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAniDB.pm @@ -0,0 +1,279 @@ +package GCPlugins::GCfilms::GCAniDB; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginAniDB; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($tagname eq 'a') + { + if ($attr->{href} =~ m/animedb\.pl\?show=animeatt&aid=([0-9]*)/) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = "animedb\.pl\?show=anime&aid=" . $1; + } + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq 'a') + { + if ($attr->{href} =~ m/animedb\.pl\?show=anime&aid=[0-9]*/) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++ if ($self->{itemIdx} < 0) || ($attr->{href} ne $self->{itemsList}[$self->{itemIdx}]->{url}); + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + } + } + elsif ($tagname eq 'td') + { + $self->{isYear} = 1 if ($attr->{class} eq 'date year'); + } + elsif ($tagname eq 'h1') + { + $self->{insideHeadline} = 1; + } + } + else + { + if ($tagname eq 'img') + { + if ($attr->{src} =~ m/http\:\/\/img[0-9]\.anidb\.info\/pics\/anime\/[0-9]*\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src} if !$self->{curInfo}->{image}; + } + } + elsif ($tagname eq 'p') + { + if ($attr->{class} eq 'desc') + { + $self->{insideSynopsis} = 1; + } + } + elsif ($tagname eq 'th') + { + $self->{isField} = 1 if $attr->{class} eq 'field'; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + $self->{insideSynopsis} = 0 if $tagname eq 'p'; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{insideHeadline}) + { + $self->{parsingEnded} = 1 if $origtext !~ m/Anime List - Search for:/; + $self->{insideHeadline} = 0; + } + + if ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{title}; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isYear}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext;# if $origtext =~ m/^ [0-9]{4}(-[0-9]{4})? $/; + $self->{isYear} = 0; + } + } + else + { + if ($self->{insideSynopsis}) + { + $origtext =~ s/\s{2,}/ /g; + $self->{curInfo}->{synopsis} .= $origtext; + #$self->{curInfo}->{synopsis} =~ s|GCBRGC|<br>|g; + #$self->{curInfo}->{synopsis} =~ s/^\s*//; + $self->{insideSynopsis} = 0; + } +# elsif ($self->{inside}->{div}) +# { +# $self->{curInfo}->{title} = $1 if $origtext =~ m/Title: (.*) /; +# if ($origtext =~ m/(?:Jap. Kanji|English): (.*) /) +# { +# $self->{curInfo}->{original} = $1; +# } +# $self->{curInfo}->{date} = $1 if $origtext =~ m/Year: (.*)/; +# $self->{curInfo}->{director} = $1 if $origtext =~ m/Companies: (.*) /; +# if ($origtext =~ m/Genre: (.*)/) +# { +# $self->{curInfo}->{genre} = $1; +# $self->{curInfo}->{genre} =~ s/ - //; +# } +# } + elsif ($self->{isField}) + { + $self->{isTitle} = 1 if $origtext eq 'Title'; + $self->{isOrig} = 1 if $origtext =~ /kanji/i; + $self->{isYear} = 1 if $origtext eq 'Year'; + $self->{isGenre} = 1 if $origtext eq 'Genre'; + $self->{isField} = 0; + } + elsif ($self->{inside}->{td}) + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isOrig} = 0; + } + elsif ($self->{isYear}) + { + $self->{curInfo}->{date} = $origtext; + $self->{isYear} = 0; + } + elsif ($self->{isGenre}) + { + ($self->{curInfo}->{genre} = $origtext) =~ s/\s//g; + $self->{curInfo}->{genre} =~ s/-$//; + $self->{isGenre} = 0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{isField} = 0; + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/<a href="animedb\.pl\?show=producer&prid=[0-9]*" title="[^"]*">([^<]*)<\/a>/$1/g; + $html =~ s/<a href="animedb\.pl\?show=genre" target="_blank">(Genre:)<\/a>/$1/g; + $html =~ s/<a href="animedb\.pl\?show=animelist&genid=[^"]*" title="[^"]*">([^<]*)<\/a>/$1/g; + $html =~ s/ - <a href="animedb\.pl\?show=search&do\.search=1(&search\.anime.genre.[0-9]*=on){1,}" title="search for other animes with all of these genres">\[similar\]<\/a> //; + #$html =~ s/<td> ([^:]*): <\/td>\s*<td> ([^<]*) ?<\/td>/<div>$1: $2<\/div>/g; + $html =~ s/<br \/>/\n/g; + $html =~ s/<b>Awards:<\/b><br><a href="[^"]*" target="_blank"><img src="[^"]*" border=0 alt="[^"]*" title="[^"]*"><\/a> <hr>//g; + + #Removed italic strings (useful for synopsis source) + $html =~ s|<i>(.*?)</i>|$1|g; + #Extract synopsis + #$html =~ s|<td>([^<]*?)</td>\s*?</tr>\s*?</table>\s*?<hr>|<div class="synopsis">$1</div>|ms; + + #Remove Headline tag + $html =~ s/>\W*?<!-- headline -->/>/; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://anidb.info/perl-bin/animedb.pl?show=animelist&adb.search=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://anidb.info/perl-bin/' . $url; + } + + sub getName + { + return 'AniDB'; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAnimator.pm b/lib/gcstar/GCPlugins/GCfilms/GCAnimator.pm new file mode 100644 index 0000000..45704d0 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAnimator.pm @@ -0,0 +1,236 @@ +package GCPlugins::GCfilms::GCAnimator; + +################################################### +# +# Copyright 2005-2009 zserghei +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAnimator; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if ($url =~ m/\/db\/\?p\=show\_film/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "td" && $attr->{class} eq "FilmName") + { + $self->{insideTitle} = 1; + } + elsif ($tagname eq "td" && $attr->{class} eq "FilmType") + { + $self->{insideTime} = 1; + $self->{insideDate} = 1; + } + elsif ($tagname eq "td" && $attr->{class} eq "FilmComments") + { + $self->{insideSynopsis} = 1; + } + elsif ($tagname eq "img") + { + $self->{curInfo}->{image} = $attr->{src} + if !$self->{curInfo}->{image} + && ($attr->{id} eq "SlideShow" || $attr->{width} =~ m/3\d{2}/); + $self->{curInfo}->{image} = "http://www.animator.ru/" . $self->{curInfo}->{image} + if $self->{curInfo}->{image} =~ m/^\//; + $self->{curInfo}->{image} = "http://www.animator.ru/db/" . $self->{curInfo}->{image} + if $self->{curInfo}->{image} =~ m/^\.\.\//; + $self->{insideImage} = 0; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + my ($title, $date); + if ($origtext =~ m/«(.*)»\s\(([0-9]*)\s.+\)/) + { + ($title, $date) = ($1, $2); + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $title; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $date; + } + else + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + $self->{isMovie} = 0; + return; + } + } + else + { + utf8::decode($origtext); + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{insideTitle}) + { + $origtext =~ s/^\W//; + $origtext =~ s/\W$//; + $origtext = ucfirst(lc($origtext)); + $self->{curInfo}->{title} = $origtext; + $self->{curInfo}->{genre} = "Мультфильм"; + $self->{curInfo}->{audio} = "русский"; + $self->{insideTitle} = 0; + } + elsif ($self->{insideDate}) + { + if ($origtext =~ m/([0-9]+)\sг/) + { + $self->{curInfo}->{date} = $1; + if ($self->{curInfo}->{date} < 1992) + { + $self->{curInfo}->{country} = "СССР"; + } + else + { + $self->{curInfo}->{country} = "Россия"; + } + $self->{insideDate} = 0; + } + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= + $self->{curInfo}->{synopsis} ? "\n" . $origtext : $origtext; + $self->{insideSynopsis} = 0; + } + if ($self->{insideTime}) + { + if ($origtext =~ m/,\s+([0-9]+)\s+мин/) + { + $self->{curInfo}->{time} = $1; + $self->{insideTime} = 0; + } + } + if ($self->{inside}->{td}) + { + $self->{insideDirector} = 1 if $origtext =~ m/режиссер/; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub getName + { + return "Animator"; + } + + sub getAuthor + { + return 'zserghei'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "KOI8-R"; +# return "Windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.animator.ru/db/?p=search&text=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.animator.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + $self->{parsingEnded} = 0; + $html =~ tr + {АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя} + {юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ}; + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAnimeNfoA.pm b/lib/gcstar/GCPlugins/GCfilms/GCAnimeNfoA.pm new file mode 100644 index 0000000..9347637 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAnimeNfoA.pm @@ -0,0 +1,266 @@ +package GCPlugins::GCfilms::GCAnimeNfoA; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAnimeNfoA; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ m/animetitle,[0-9]*,[a-z]*,[a-z0-9_]*\.html/) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + } + elsif ($tagname eq "td") + { + if ($attr->{class} eq "anime_info") + { + $self->{couldBeYear} = 1; + } + } + } + else + { + if ($tagname eq 'table') + { + if ($attr->{class} eq 'anime_info') + { + $self->{insideInfos} = 1; + } + } + elsif ($tagname eq 'img') + { + if ($attr->{class} eq 'float') + { + $self->{curInfo}->{image} = 'http://www.animenfo.com/' . $attr->{src}; + } + } + elsif ($tagname eq 'a') + { + if ($attr->{href} =~ /animebygenre\.php\?genre=/) + { + $self->{insideGenre} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if (length($origtext) < 2) && ($origtext !~ /\d+$/); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{couldBeYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext if $origtext =~ m/^[0-9]{4}$/; + $self->{couldBeYear} = 0; + return; + } + } + else + { + if ($self->{insideInfos}) + { + if ($origtext eq "Title") + { + $self->{insideName} = 1; + } + elsif ($origtext eq "Japanese Title") + { + $self->{insideOrig} = 1; + } + elsif ($origtext eq "Total Episodes") + { + $self->{insideTime} = 1; + } + elsif ($origtext eq "Year Published") + { + $self->{insideDate} = 1; + } + elsif ($origtext eq "Director") + { + $self->{insideDirector} = 1; + } + elsif ($origtext eq "User Rating") + { + $self->{insideRating} = 1; + } + elsif ($origtext =~ m/Description/) + { + $self->{insideSynopsis} = 1; + } + elsif ($self->{insideName}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideName} = 0; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext if $origtext ne "Official Site"; + $self->{insideOrig} = 0; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^(\d+)\s*(.*)/$1 episodes $2/; + $self->{curInfo}->{time} .= $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + $self->{curInfo}->{genre} .= $origtext . ','; + $self->{insideGenre} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext if $origtext =~ m/[0-9]{4}/; + $self->{insideDate} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext if $origtext ne "US Distribution"; + $self->{insideDirector} = 0; + } + elsif ($self->{insideRating}) + { + $origtext =~ m|([\d\.]+)/10\.0|; + $self->{curInfo}->{ratingpress} = int ($1 + 0.5); + $self->{insideRating} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext if !$self->{curInfo}->{synopsis}; + $self->{insideSynopsis} = 0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s/<br \/>/\n/g; + $html =~ s/<script language='JavaScript'>.*?<\/script>//g; + $html =~ s|<i>([^<]*)</i>|$1|g; + $html =~ s|\t||g; + $html =~ s/<a onMouseOut='[^']*' onMouseOver='[^']*' href='animebygenre\.php\?genre=[0-9]*'>([^<]*)<\/a>/$1/g; + $html =~ s/<a href='animebyyear\.php\?year=[0-9]{4}'>([0-9]{4})<\/a>/<font class='DefaultFont'>$1<\/font>/; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.animenfo.com/search.php?option=keywords&queryin=anime_titles&query=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.animenfo.com/' . $url; + } + + sub getName + { + return "AnimeNfo Anime"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'EN'; + } + + sub getNotConverted + { + my $self = shift; + return ['orig']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAnimeka.pm b/lib/gcstar/GCPlugins/GCfilms/GCAnimeka.pm new file mode 100644 index 0000000..f4e3c77 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAnimeka.pm @@ -0,0 +1,295 @@ +package GCPlugins::GCfilms::GCAnimeka; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginAnimeka; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if (($tagname eq 'form') + && ($attr->{name} eq 'form_note_serie') + && (! $self->{itemsList}[0]->{url})) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $attr->{action}; + } + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq "img") + && ($attr->{class} eq "rechercheindeximg") + && ($attr->{alt} eq "Animesindex")) + { + $self->{parsingEnded} = 1 if $attr->{src} !~ /rechercheindex\.gif/; + } + + if ($tagname eq "a") + { + if (($attr->{href} =~ /^\/animes\/detail\//)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{class} eq "picture") + { + $self->{curInfo}->{image} = "http://animeka.com" . $attr->{src}; + } + elsif (($attr->{class} eq "animeslegendimg") + && ($attr->{src} =~ /^\/_distiller\/show_flag\.php\?id=/)) + { + if (!$self->{curInfo}->{country}) + { + $self->{curInfo}->{country} = $attr->{alt}; + } + elsif ($self->{curInfo}->{country} !~ $attr->{alt}) + { + $self->{curInfo}->{country} .= ", " . $attr->{alt}; + } + } + } + elsif ($tagname eq "td") + { + $self->{insideInfos} = 1 if $attr->{class} eq "animestxt"; + $self->{insideName} = 1 if $attr->{class} eq "animestitle"; + } + elsif ($tagname eq "div") + { + $self->{insideSynopsis} = 1 if $attr->{class} eq "synopsis"; + $self->{insideAlternate} = 1 if $attr->{class} eq "alternate"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{inside}->{script}) + { + if ($origtext =~ /document\.location\.href="(.*?)"/) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $1; + } + return; + } + + if ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isYear}) + { + $origtext =~ s/ : ([0-9]{4}) - [0-9]*\s*[A-Z]*/$1/; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext; + $self->{isYear} = 0; + } + elsif ($self->{inside}->{u}) + { + $self->{isYear} = 1 if $origtext =~ /Ann.e \/ nombre et format/; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideInfos}) + { + $origtext =~ s/(.*), $/$1/; + if ($origtext =~ /TITRE ORIGINAL : (.*)/) + { + $self->{curInfo}->{original} = $1; + } + elsif ($origtext =~ /AUTEUR(?:S)? : (.*)/) + { + $self->{curInfo}->{director} = $self->capWord($1); + } + elsif (($origtext =~ /VOLUMES, TYPE . DUR.E : (.*)/) + || ($origtext =~ /TYPE . DUR.E : (.*)/)) + { + $self->{curInfo}->{time} = $self->capWord($1); + } + elsif ($origtext =~ /ANN.E DE PRODUCTION : (.*)/) + { + $self->{curInfo}->{date} = $self->capWord($1); + } + elsif ($origtext =~ /GENRE(?:S)? :/) + { + $origtext =~ s/(?:, )|(?: & )/,/g; + $origtext =~ /GENRE(?:S)? : (.*)/; + $self->{curInfo}->{genre} = $self->capWord($1); + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideName}) + { + if ($origtext =~ /(.*?)( \(([0-9]{4})\))?$/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $3; + } + $self->{insideName} = 0; + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ s/\[br\]/\n/g; + $origtext =~ s/\[endline\]//g; + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideAlternate}) + { + $origtext =~ s/\[br\]/\n/g; + $origtext =~ s/\[endline\]//g; + $self->{curInfo}->{original} = $origtext if ! $self->{curInfo}->{original}; + $self->{insideAlternate} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/ / /g; + $html =~ s/&/&/g; + $html =~ s/<b>|<\/b>//g; + $html =~ s/<i>|<\/i>//g; + $html =~ s/<br \/>/\[br\]/g; + $html =~ s/\n/\[endline\]/g; + $html =~ s/<span style="background:#CBD1DD;">([^<]*)<\/span>/$1/g; + $html =~ s/\[<a href="\/animes\/(?:studios|genres|pers)\/.*?\.html">([^<]*)<\/a>\] /$1, /g; + $html =~ s/<a href="\/avis\/index.html"[^>]*>([^<]*)<\/a>/$1/g; + $html =~ s/<td [^>]*>Synopsis<\/td><\/tr><tr><td [^>]*><table [^>]*><tr><td [^>]*>(.*?)<\/td><\/tr><\/table><\/td>/<div class="synopsis">$1<\/div>/; + $html =~ s/<td [^>]*>Titre alternatif<\/td><\/tr><tr><td [^>]*><table [^>]*><tr><td [^>]*>(.*?)<\/td><\/tr><\/table><\/td>/<div class="alternate">$1<\/div>/; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.animeka.com/search/index.html?req=$word&zone_series=1&go_search=1&cat=search"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.animeka.com" . $url; + } + + sub getName + { + return "Animeka.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + sub getCharset + { + return "ISO-8859-1"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCBeyazPerde.pm b/lib/gcstar/GCPlugins/GCfilms/GCBeyazPerde.pm new file mode 100644 index 0000000..e05ce80 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCBeyazPerde.pm @@ -0,0 +1,340 @@ +package GCPlugins::GCfilms::GCBeyazPerde; + +################################################### +# +# Copyright 2007-2009 Zuencap +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginBeyazPerde; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ /\/film\// && $attr->{class} eq "turuncucizgisiz_11_px") + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /^\/images\/film\//) + { + $self->{curInfo}->{image} = "http://beyazperde.mynet.com" . $attr->{src} + if !$self->{curInfo}->{image}; + } + } + elsif ($tagname eq "td") + { + if ($self->{insideSynopsis} == 1) + { + $self->{insideSynopsis} = 2; + } + } + elsif ($tagname eq "h1") + { + if ($attr->{class} eq "baslik_filmadi31") + { + $self->{insideTitle} = 1; + } + } + elsif ($tagname eq "h2") + { + if ($attr->{class} eq "baslik_filmadi32") + { + $self->{insideTitle} = 2; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if (!$self->{parsingList}) + { + if ($tagname eq "table") + { + if ($self->{insideActors}) + { + $self->{insideActors} = 0; + $self->{insideSynopsis} = 1; + } + $self->{insideTime} = 0; + } + elsif ($tagname eq "td") + { + if ($self->{insideSynopsis} == 2) + { + $self->{insideSynopsis} = 0; + } + } + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + if ($self->{parsingList}) + { + if ($self->{isMovie} == 0) + { + return; + } + elsif ($self->{isMovie} == 1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + elsif ($self->{isMovie} == 2) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 if $origtext =~ m/\(([0-9]*)\)/; + } + elsif ($self->{isMovie} == 5) + { + if ($origtext eq "Y:") + { + $self->{isMovie}++; + } + } + elsif ($self->{isMovie} == 7) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + } + elsif ($self->{isMovie} == 9) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $origtext; + $self->{isMovie} = -1; + } + + $self->{isMovie}++; + return; + } + else + { + if ($self->{insideGenre} && ($self->{inside}->{a})) + { + $self->{curInfo}->{genre} = $self->capWord($origtext); + $self->{insideGenre} = 0; + } + elsif ($self->{insideDirector} && ($self->{inside}->{a})) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis} == 2) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + } + elsif ($self->{insideTime}) + { + if ($self->{insideTime} == 1) + { + if ($self->{inside}->{a}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideTime}++; + } + } + elsif ($self->{insideTime} == 2) + { + if ($self->{inside}->{a}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideTime}++; + } + } + elsif ($origtext =~ / dk\./) + { + $origtext =~ s/.*, (.*) dk\./$1 dk\./; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + } + elsif ($self->{insideActors}) + { + if ($self->{inside}->{a}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + elsif ($self->{inside}->{font} && ($origtext =~ m/\((.*)\)/)) + { + # As we incremented it above, we have one more chance here to add a role + # Without <= we would skip the role for last actor + push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}-1]}, $1 + if ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + } + } + elsif ($self->{insideOtherTitles}) + { + if ($origtext =~ m/(.*?) \(International.*/) + { + $self->{curInfo}->{title} = $1; + $self->{insideOtherTitles} = 0; + } + } + elsif ($self->{insideTitle} == 1) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideTitle} == 2) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideTitle} = 0; + } + + if ($self->{inside}->{span}) + { + $self->{insideDirector} = 1 if $origtext =~ m/Y\xf6netmen : /; + $self->{insideGenre} = 1 if $origtext eq "T\xfcr : "; + $self->{insideTime} = 1 if $origtext =~ m/Yapım/; + $self->{insideActors} = 1 + if $origtext =~ m/Oyuncular/ || $origtext =~ m/Seslendirenler/; + if ($origtext =~ m{SinePuan:\s+(\d+\,\d+)\s+}) + { + my $rating = $1; + $rating =~ s/,/./; + $self->{curInfo}->{ratingpress} = int($rating + 0.5); + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #Fix for character-encoding: + $html =~ s/\x85/\.\.\./g; + $html =~ s/\x92/'/g; + $html =~ s/\x93/“/g; + $html =~ s/\x94/”/g; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s/ / /g; + $html =~ s|</a></b><br>|</a><br>|; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://beyazperde.mynet.com/arama.asp?kat=film&keyword=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://beyazperde.mynet.com/'; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getName + { + return "Beyaz Perde"; + } + + sub getAuthor + { + return 'Zuencap'; + } + + sub getLang + { + return 'TR'; + } + + sub getCharset + { + my $self = shift; + + return "utf-8"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCartelesPeliculasES.pm b/lib/gcstar/GCPlugins/GCfilms/GCCartelesPeliculasES.pm new file mode 100644 index 0000000..e0aff39 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCartelesPeliculasES.pm @@ -0,0 +1,351 @@ +package GCPlugins::GCfilms::GCCartelesPeliculasES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginCartelesPeliculasES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + return if length($origtext) < 2; + + # Código para procesar el resultado de la busqueda + if ($self->{parsingList}){ + # Guardamos el título + if ($self->{inside}->{h3} && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + } + return; + } + # Codigo para el contenido de la ficha + else{ + # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto + $origtext =~ s/^\s*|\s{2,}|\s*$//g; + # Estamos procesando el titulo + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + return; + } + # Estamos en la puntuación + if ($self->{inside}->{strong} && ($origtext =~ /[0-5],[0-5][0-5]/)) + { + $self->{curInfo}->{rating} = ($origtext/5)*10; + return; + } + + # Procesamos el titulo original + if ($self->{isOrigTit} eq 1) { + $self->{isOrigTit} = 0; + # Indicamos que en el siguiente paso hay que leer año,pais,duracion + $self->{isOther} = 1; + # Reemplazamos la primera , por # y después obtenemos el texto + $origtext =~ s/,/#/; + $origtext =~ s/#.*//; + $self->{curInfo}->{original} = $origtext; + return; + } + # Procesamos Año, pais, duracion + if ($self->{isOther} eq 1) { + # Comprobamos si tiene el formato de año, pais, duración + if($origtext =~ /^(.*), (.*), (.*)$/){ + $self->{isOther} = 0; + $self->{curInfo}->{date} = $1; + $self->{curInfo}->{country} = $2; + $self->{curInfo}->{time} = $3; + } + return; + } + # Procesamos los directores + if ($self->{isDirector} eq 1) { + $self->{curInfo}->{director} = $origtext; + $self->{isDirector} = 0; + return; + } + # Actores + if ($self->{isActors} eq 1) { + $self->{curInfo}->{actors} = $origtext; + $self->{isActors} = 0; + } + # sinopsis + if ($self->{isSynopsis} eq 1) { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + + # Condiciones para procesar los campos en el siguiente ciclo + if($self->{inside}->{p}){ + $self->{isOrigTit} = 1 if $origtext eq "akas:"; + $self->{isDirector} = 1 if $origtext eq "Director:"; + $self->{isActors} = 1 if $origtext eq "Intérpretes:"; + $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:"; + return; + } + + } + } + + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + # Código para procesar el resultado de la busqueda para generar el listado + if ($self->{parsingList}) + { + # Comprobamos si estamos dentro del marcador que inicia la info de un titulo + if( ($tagname eq "h3" ) && ($attr->{class} eq "entry-title" )){ + # Indicamos que tenemos que se puede leer la info e incrementamos el número de resultados + $self->{itemIdx}++; + $self->{insideInfos} = 1 ; + return; + } + # Si estamos en un título y encontramos una tag a, es un enlace a ficha + if ($tagname eq "a" && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + return; + } + + if(($tagname eq "div") && ($attr->{class} eq "entry-summary" ) && $self->{insideInfos}){ + $self->{insideInfos} = 0; + return; + } + } + # Código para procesar la información de la pelicula seleccionada + else { + if ($tagname eq "h1"){ + $self->{insideTitle} = 1; + return; + } + # Si estamos dentro de una imagen y no se ha asignado ninguna, la asignamos + if (($tagname eq "img") & !$self->{curInfo}->{image}) + { + # Imágenes en cmg: + # Thumb http://www.cartelespeliculas.com/galeria/albums/003/thumbs_23p47303003.jpg + # ./../../galeria/albums/005/thumbs_23p43025005.jpg + # Normal: http://www.cartelespeliculas.com/galeria/albums/003/23p47303003.jpg + # Extraemos la dirección de la imagen a partir del thumb + if ($attr->{src} =~ /\.\/\.\.\/\.\.\/(galeria\/albums\/[0-9]*\/)thumbs_(.*)$/) + { + $self->{curInfo}->{image} = "http://www.cartelespeliculas.com/". $1 .$2; + } + return; + } + } + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # Anulamos el html si coincide con el patron de no resultados + if($html =~ /^.*Lo sentimos, no se ha encontrado.*$/s){ + $html = ""; + return $html; + } + + # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html + # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa + if($html =~ s/^.*<ul class="hfeed posts-default clearfix">(.*)\t<\/li>\n\t\t<\/ul>.*$/$1/s){ + return $html; + } + + # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html + if($html =~ s/^.*<div id="content" class="section">\n\n\n\n\t\t(.*)<\/li>\n<\/ul>\n<\/div>.*$/$1/s){ + return $html; + } + return $html; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + #sub changeUrl + #{ + # my ($self, $url) = @_; + # return $url; + #} + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + #sub getExtra + #{ + # return 'Extra'; + #} + + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return "ES"; + } + + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return "DoVerMan"; + } + + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "CartelesPeliculas"; + } + + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + #sub getCharset + #{ + # my $self = shift; + # # Charset de la web + # return "UTF-8"; + #} + + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.cartelespeliculas.com/wp/?s=" . $word; + } + + + # Constructor + sub new + { + # Inicialización + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran + # en el listado de resultados + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 0 + }; + + # Indica si estamos procesando información útil + $self->{insideInfos} = 0; + + # Indican el estado del procesado del listado de resultados + $self->{insideTitle} = 0; + + # Indican el estado del procesado del listado de resultados (0 no procesar, 1 es el siguiente, 2 procesando) + $self->{isOther} = 0; + $self->{isTitle} = 0; + $self->{isOrigTit} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isSynopsis} = 0; + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCinemaClock.pm b/lib/gcstar/GCPlugins/GCfilms/GCCinemaClock.pm new file mode 100644 index 0000000..26ce915 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCinemaClock.pm @@ -0,0 +1,271 @@ +package GCPlugins::GCfilms::GCCinemaClock; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginCinemaClock; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ +/http\:\/\/www\.CinemaClock\.com\/aw\/crva\.aw\/p\.clock\/r\.que\/m\.Montreal\/j\.f\/i\./ + ) + { + my $url = $attr->{href}; + $url =~ s/http\:\/\/www\.CinemaClock\.com(.*)/$1/; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($self->{curInfo}->{image} !~ /^\/images\/dvd\//) + { + if ($attr->{src} =~ /^\/images\/dvd\/med\/(.*)\.gif/) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com/images/dvd/" . $1 . ".jpg"; + } + elsif ($attr->{src} =~ /^\/images\/dvd\//) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com" . $attr->{src}; + } + elsif ($attr->{src} =~ /^\/images\/posters\//) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com" . $attr->{src}; + } + elsif ($attr->{src} =~ /^\/images\//) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com" . $attr->{src} + if !$self->{curInfo}->{image}; + } + } + } + elsif ($tagname eq "div") + { + $self->{insideInfos} = 1 if $attr->{class} eq "informations"; + $self->{insideName} = 1 if $attr->{class} eq "movietitle"; + } + elsif ($tagname eq "p") + { + $self->{insideSynopsis} = 1 if $attr->{style} eq "text-align: justify"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = + $self->capWord($origtext); + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($origtext =~ /\(([0-9]{4})\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideInfos}) + { + if ($origtext =~ /Ann.e\:.(.*)/) + { + $self->{curInfo}->{date} = $1; + } + elsif ($origtext =~ /Pays\:.(.*)/) + { + $self->{curInfo}->{country} = $1; + } + elsif ($origtext =~ /Genre\:.(.*)/) + { + $self->{curInfo}->{genre} = $self->capWord($1); + $self->{curInfo}->{genre} =~ s/, /,/g; + } + elsif ($origtext =~ /Dur.e\:.(.*)/) + { + $self->{curInfo}->{time} = $1; + } + elsif ($origtext =~ /R.alis..par\:.(.*)/) + { + $self->{curInfo}->{director} = $1; + } + elsif ($origtext =~ /En.vedette\:.(.*)/) + { + $self->{curInfo}->{actors} = $1; + } + elsif ($origtext =~ /Classement\:.(.*)/) + { + $self->{curInfo}->{age} = 2 if $origtext =~ /G/; + $self->{curInfo}->{age} = $1 if $origtext =~ /([0-9]+)/; + } + elsif ($origtext =~ /Guide.parental\:.(.*)/) + { + $self->{curInfo}->{age} = 5 if $self->{curInfo}->{age} == 2; + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideName}) + { + $origtext =~ s/"//g; + $self->{curInfo}->{title} = $origtext; + $self->{insideName} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($origtext =~ /Version fran.aise de(.*)/) + { + $self->{curInfo}->{original} = $1; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #<<< keep perltidy away + $html =~ s{<b>|</b>}{}g; + $html =~ s{<a href="/aw/cpea\.aw/p\.clock/r\.que/m\.Montreal/j\.f/i\.[0-9]*/a\.[^"]*">([^<]*)</a>} + {$1}g; + $html =~ s{<span class=arialb2>([^<]*)</span></td>[^<]*<td><span class=arial2>([^<]*)</span>} + {/<div class="informations">$1$2</div>}g; + $html =~ s{<span class=movietitle>([^<]*)</span>} + {<div class="movietitle">$1</div>}; + $html =~ s{<font color=[^>]*>|</font>|<span class=[^>]*>|</span>} + {}g; + #>>> + +# $html =~ s/<a href="\/aw\/cpea\.aw\/p\.clock\/r\.que\/m\.Montreal\/j\.f\/i\.[0-9]*\/a\.[^"]*">([^<]*)<\/a>/$1/g; +# $html =~ s/<span class=arialb2>([^<]*)<\/span><\/td>[^<]*<td><span class=arial2>([^<]*)<\/span>/<div class="informations">$1$2<\/div>/g; +# $html =~ s/<span class=movietitle>([^<]*)<\/span>/<div class="movietitle">$1<\/div>/; +# $html =~ s/<font color=[^>]*>|<\/font>|<span class=[^>]*>|<\/span>//g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.cinemaclock.com/aw/csra.aw?" + . "p=clock&r=que&m=Montreal&j=f&key=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.cinemaclock.com" . $url; + } + + sub getName + { + return "CinemaClock.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCinemotions.pm b/lib/gcstar/GCPlugins/GCfilms/GCCinemotions.pm new file mode 100644 index 0000000..c90c91a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCinemotions.pm @@ -0,0 +1,284 @@ +package GCPlugins::GCfilms::GCCinemotions; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginCinemotions; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'a') + { + if (($attr->{href} =~ /^\/modules\/Films\/fiche\//) + && ($attr->{class} eq "link4")) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + elsif (($tagname eq 'img') && (($attr->{src} =~ /^\/data\/films\//) + || ($attr->{src} =~ /^\/modules\/Films\/img\/webpasdaffiche\.jpg/))) + { + $self->{isMovie} = 0; + $self->{itemIdx}--; + } + elsif ($tagname eq 'font') + { + if ($attr->{class} eq 'link4dtext') + { + $self->{isInfo}=1; + } + } + } + else + { + if ($tagname eq 'img') + { + if (($attr->{src} =~ m|/data/films/|) + && ($attr->{src} !~ m|/data/films/[^_]*_[0-9]{4}_[0-9]*\.jpg|) + && ($attr->{width} == 150)) + { + $self->{curInfo}->{image} = $attr->{src}; + if ($self->{bigPics}) + { + $self->{curInfo}->{image} =~ s/\/h200\//\//; + } + } + } + elsif ($tagname eq 'font') + { + $self->{insideOrig} = 1 if $attr->{class} eq 'titrevo_film'; + $self->{insideInfos} = 1 if ($attr->{face} eq 'arial') + && ($attr->{size} eq '2'); + $self->{insideArtists} = 1 if ($attr->{face} eq 'verdana,geneva,arial') + && ($attr->{size} eq '2'); + $self->{insideSynopsis} = 1 if ($attr->{class} eq 'link6') + && ($self->{inside}->{fieldset}) + && (!$self->{curInfo}->{synopsis}); + } + elsif ($tagname eq 'h2') + { + $self->{insideOrig} = 1 if $attr->{style} eq 'color: #333333; font-size:13px'; + } + elsif ($tagname eq 'br') + { + if ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= "\n"; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + $self->{insideSynopsis} = 0 if $tagname eq 'font'; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + $origtext =~ s/\s{2,}//g; + $origtext =~ s/\n*//g if !$self->{insideSynopsis}; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + if (($self->{inside}->{h1}) || ($self->{inside}->{h2})) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + } + elsif ($self->{isInfo}) + { + if (($origtext =~ /([0-9]{4}) - [0-9]*h[0-9]*/) + || ($origtext =~ /([0-9]{4}) - [0-9]* mn/)) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + } + elsif ($origtext =~ /^\s*R.alisation : (.*)/) + { + $self->{itemsList}[$self->{itemIdx}]->{director} =$1; + } + elsif ($origtext =~ /^\s*avec (.*)/) + { + $self->{itemsList}[$self->{itemIdx}]->{actors} = $1; + $self->{isInfo} = 0; #$html =~ s|<br\s*/>|\n|g; + + } + } + } + else + { + if ($self->{inside}->{h1} + && !$self->{curInfo}->{title}) + { + $self->{curInfo}->{title} = $origtext; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext + if !$self->{curInfo}->{original}; + $self->{insideOrig} = 0; + } + if ($self->{insideInfos}) + { + if ($origtext =~ /([0-9]{4})- (.*?)- ([^-]*)(?:- (.*))?/) + { + my $date = $1, my $nat = $2, my $type = $3, my $time = $4; + $nat =~ s|/|, |g; + $type =~ s|/|,|g; + + $self->{curInfo}->{date} = $date; + $self->{curInfo}->{country} = $nat; + $self->{curInfo}->{genre} = $type; + $self->{curInfo}->{time} = $time; + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideArtists}) + { + if ($origtext =~ /R.alisation\s*:\s*(.*)/) + { + $self->{curInfo}->{director} = $1 if !$self->{curInfo}->{director}; + } + elsif ($origtext =~ /avec\s*:?\s*(.*)/i) + { + if (!$self->{curInfo}->{actors}) + { + $self->{curInfo}->{actors} = $1; + $self->{curInfo}->{actors} =~ s/\s*\(([^\)]*)\)\s*/;$1/g; + } + } + $self->{insideArtists} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html =~ s/<!--[^-]*-->//g; + $html =~ s/<b>|<\/b>//g; + $html =~ s/ / /g; + $html =~ s/\\'//g; + $html =~ s|<A HREF="/modules/Artistes/fiche/[0-9]*[^>]*>(.*?)</A>|$1|gi; + $html =~ s/<font class=link_news_2>([^<]*)<\/font>/$1/gi; + $html =~ s/<font class=link4dtext>([^<]*)<br>([^<]*)<\/TD>/<font class=link4dtext>$1 $2<\/font><\/TD>/gi; + $html =~ s|<h1>Oops\!</h1>||gi; + + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|œ|oe|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.cinemotions.com/recherche/$word.html" + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.cinemotions.com' . $url; + } + + sub getName + { + return 'Cinemotions.com'; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCsfd.pm b/lib/gcstar/GCPlugins/GCfilms/GCCsfd.pm new file mode 100644 index 0000000..ea84b45 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCsfd.pm @@ -0,0 +1,699 @@ +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCfilms::GCCsfd; + +################################################### +# +# Copyright 2005-2009 Tian +# Copyright 2007,2011 Petr Gajdůšek <gajdusek.petr@centrum.cz> +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +#use warnings; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginCsfd; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchCharset + # Charset of search term + sub getSearchCharset + { + return 'UTF-8'; + } + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.csfd.cz/hledat/?q=$word"; + } + + # getItemUrl + # Used to get the full URL of a movie page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + $url = "http://www.csfd.cz" . $url if ($url !~ /^http:/); + return $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + #sub getCharset { + # my $self = shift; + # + # return "UTF-8"; + #} + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "CSFD.cz"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Petr Gajdůšek'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'CS'; + } + + # hasSearchYear + # Used to hide year column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchYear + { + return 1; + } + + # hasSearchDirector + # Used to hide director column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchDirector + { + return 1; + } + + # hasSearchActors + # Used to hide actors column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchActors + { + return 1; + } + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + + return 'Žánr'; + } + + # changeUrl + # Can be used to change URL if movie URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + $self->{parsingEnded} = 0; + if ($self->{parsingList}) + { + # Search results + + # Initial values for search results parsing + # There are two movies list: + # First with detailed info (title, genre, origin country, year, directors, actors) + # Second with brief list of other movies (title, year) + + # We are in brief list containing other movies without details + $self->{insideOtherMovies} = 0; + # Movie link; movie's details follow if not in brief list + $self->{isMovie} = 0; + + ## Details: + + # Movie's details will follow: Genre, origin, actors, directors, year + $self->{insideDetails} = 0; + # In movie's details after paragraph with Genre, origin and date + $self->{wasDetailsInfo} = 0; + # In movie's details: directors and actors + $self->{directors} = (); + $self->{directorsCounter} = 0; + $self->{actors} = (); + $self->{actorsCounter} = 0; + $self->{insideDirectors} = 0; + $self->{insideActors} = 0; + + # Movie year + $self->{isYear} = 0; + + ## Preprocess + + # directors and actors + $html =~ s/\n\s*Režie:\s([^\n]*)/<div class="directors">$1<\/div>/g; + $html =~ s/\n\s*Hrají:\s([^\n].*)/<div class="actors">$1<\/div>/g; + # year + $html =~ s/<span class="film-year">\(([0-9]+)\)<\/span>/<span class="film-year">$1<\/span>/g; + } + else + { + # Movie page + + # Initial values for search results parsing + + # array containg other movie titles (not exported to GCStar) + $self->{titles} = (); + # in list containing other movie titles + $self->{isTitles} = 0; + # in the original title (title for same country as movie's origin) + $self->{isOrigTitle} = 0; + # original title (if not set during parsing it will be set to main title at the end) + $self->{origTitle} = undef; + $self->{titlesCounter} = 0; + + $self->{insideGenre} = 0; + + $self->{awaitingSynopsis} = 0; + $self->{insideSynopsis} = 0; + + # inside details with country, date (year) and time (length) + $self->{insideInfo} = 0; + + $self->{insideRating} = 0; + + # User comments + # Each comment consists of commenter (user) and his comment + + $self->{insideCommentAuthor} = 0; + $self->{awaitingComment} = 0; + $self->{insideComment} = 0; + + # In directors and actors + $self->{insideDirectors} = 0; + $self->{insideActors} = 0; + $self->{directors} = (); + $self->{directorsCounter} = 0; + $self->{actors} = (); + $self->{actorsCounter} = 0; + + ## Preprocess + + # removee <br /> and <br> + $html =~ s/<br( \/)?>/\n/g; + ## Synopsis + # remove list bullet + $html =~ s/<img src="http:\/\/img.csfd.cz\/sites\/web\/images\/common\/li.gif"[^>]*>//g; + # remove hyperlink to user profile + $html =~ s/( <span class="source[^\(]*\()<a[^>]*>([^<]*)<\/a>/$1uživatel $2/g; + # remove <span></span> around synopsis source + $html =~ s/ <span class="source[^\(]*\(([^\)]*)\)<\/span>/\n-- $1/g; + $html =~ s/<div data-truncate="570">([^<]*)<\/div>/$1/g; + } + return $html; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a movie information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{movieIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill (if available) following fields: + # + # $self->{movieList}[$self->{movieIdx}]->{title} + # $self->{movieList}[$self->{movieIdx}]->{url} + # $self->{movieList}[$self->{movieIdx}]->{actors} + # $self->{movieList}[$self->{movieIdx}]->{director} + # $self->{movieList}[$self->{movieIdx}]->{date} + # $self->{movieList}[$self->{movieIdx}]->{extra} + + # When processing a movie page, you need to fill the fields (if available) in $self->{curInfo}. They are: + # + # $self->{curInfo}->{title} + # $self->{curInfo}->{director} + # $self->{curInfo}->{original} (Original title) + # $self->{curInfo}->{actors} + # $self->{curInfo}->{genre} (Comma separated list of movie type) + # $self->{curInfo}->{country} (Movie Nationality or country) + # $self->{curInfo}->{date} + # $self->{curInfo}->{time} + # $self->{curInfo}->{synopsis} + # $self->{curInfo}->{image} + # $self->{curInfo}->{audio} + # $self->{curInfo}->{subt} + # $self->{curInfo}->{age} 0 : No information + # 1 : Unrated + # 2 : All audience + # 5 : Parental Guidance + # >= 10 : Minimum age value + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + + # in brief list of other movies (without details) + if ($tagname eq "ul" and $attr->{class} eq "films others") + { + $self->{insideOtherMovies} = 1; + } + + # in link to movie page + if ($tagname eq "a" and $attr->{href} =~ m/\/film\/[0-9]+-.*/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + $self->{insideDetails} = 1 if ($self->{insideOtherMovies} != 1); + $self->{wasDetailsInfo} = 0; + } + + # directors and actors + if ($tagname eq "div") + { + $self->{insideDirectors} = 1 if ($attr->{class} eq "directors"); + $self->{insideActors} = 1 if ($attr->{class} eq "actors"); + } + + # year + if ($tagname eq "span") + { + $self->{isYear} = 1 if ($attr->{class} eq "film-year"); + } + } + else + { + + # Synopsis + if ( $tagname eq "div" + and $attr->{class} eq "content" + and $self->{awaitingSynopsis}) + { + $self->{insideSynopsis} = 1; + $self->{awaitingSynopsis} = 0; + } + + # Poster + if ( $tagname eq "img" + and $attr->{src} =~ /^http:\/\/img\.csfd\.cz\/posters\//) + { + $self->{curInfo}->{image} = $attr->{src}; + } + + # Original name and other names + if ($tagname eq "ul" and $attr->{class} eq "names") + { + $self->{isTitles} = 1; + } + + if ($tagname eq "img" and $self->{isTitles}) + { + $self->{isOrigTitle} = 1 if ($attr->{alt} !~ /název$/); + $self->{isSKTitle} = 1 if ($attr->{alt} =~ /SK název$/); + } + + # Genre + if ($tagname eq "p" and $attr->{class} eq "genre") + { + $self->{insideGenre} = 1; + } + + # Info (country ,date, time = duration) + if ($tagname eq "p" and $attr->{class} eq "origin") + { + $self->{insideInfo} = 1; + } + + # Rating + if ($tagname eq "h2" and $attr->{class} eq "average") + { + $self->{insideRating} = 1; + } + + # Comments + if ($tagname eq "h5" and $attr->{class} eq "author") + { + $self->{insideCommentAuthor} = 1; + } + if ($self->{awaitingComment} and $tagname eq "p" and $attr->{class} eq "post") + { + $self->{awaitingComment} = 0; + $self->{insideComment} = 1; + } + + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{parsingList}) + { + + # movie details + $self->{insideDetails} = 0 + if ($tagname eq "div") + and $self->{insideDetails}; + + # directors and actors + if ($tagname eq "div") + { + if ($self->{insideDirectors}) + { + $self->{insideDirectors} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{director} = + join(', ', @{$self->{directors}}); + $self->{directors} = (); + $self->{directorsCounter} = 0; + } + if ($self->{insideActors}) + { + $self->{insideActors} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = + join(', ', @{$self->{actors}}); + $self->{actors} = (); + $self->{actorsCounter} = 0; + } + } + } + else + { + + # Synopsis + $self->{insideSynopsis} = 0 if ($tagname eq "div"); + + # Titles + if ($tagname eq "ul" and $self->{isTitles}) + { + $self->{isTitles} = 0; + } + + if ( $tagname eq "body" ) + { + $self->{curInfo}->{original} ||= $self->{curInfo}->{title}; + } + + # Actors + if ($tagname eq "div" and $self->{insideActors}) + { + $self->{curInfo}->{actors} = join(', ', @{$self->{actors}}); + $self->{insideActors} = 0; + } + + # Directors + if ($tagname eq "div" and $self->{insideDirectors}) + { + $self->{curInfo}->{director} = join(', ', @{$self->{directors}}); + $self->{insideDirectors} = 0; + } + + # Comment + + $self->{insideCommentAuthor} = 0 + if ($tagname eq "h5" and $self->{insideCommentAuthor}); + + if ($tagname eq "li" and $self->{isComment}) + { + $self->{curInfo}->{comment} .= "\n"; + $self->{isComment} = 0; + } + + # Debug + if ($tagname eq "body" and $self->{debug}) + { + use Data::Dumper; + print Dumper $self->{curInfo}; + } + } + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + $origtext =~ s/^\s+|\s+$//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{h1} && $origtext !~ m/Vyhledávání/i) + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + + # Movie title + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + return; + } + + # Date (year) + elsif ($self->{isYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $origtext; + $self->{isYear} = 0; + } + + # Extra movie info: genre, origin, date + elsif ( $self->{inside}->{p} + and $self->{insideDetails} + and $self->{wasDetailsInfo} == 0) + { + my @tmp = split(', ', $origtext); + my $pos = $#tmp; + my ($year, $country, $genre) = (undef, undef, undef); + $year = $tmp[$pos] if ($tmp[$pos] =~ /^\d+$/); + $pos--; + $country = $tmp[$pos] if ($pos >= 0); + $pos--; + $genre = $tmp[$pos] if ($pos >= 0); + + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $year if (defined $year); + $self->{itemsList}[ $self->{itemIdx} ]->{country} = $country + if (defined $country); + $self->{itemsList}[ $self->{itemIdx} ]->{extra} = $genre + if (defined $genre); + $self->{wasDetailsInfo} = 1; + } + + # Directors + elsif ($self->{inside}->{a} and $self->{insideDirectors}) + { + push @{$self->{directors}}, $origtext; + $self->{directorsCounter}++; + } + + # Actors + elsif ($self->{inside}->{a} and $self->{insideActors}) + { + push @{$self->{actors}}, $origtext; + $self->{actorsCounter}++; + } + } + else + { + + # Movie titles + if ($self->{inside}->{h1}) + { + $self->{curInfo}->{title} = $origtext + if !$self->{curInfo}->{title}; + } + if ($self->{inside}->{h3} and $self->{isTitles}) + { + $self->{titlesCounter}++; + $self->{titles}[ $self->{titlesCounter} ] = $origtext; + if ($self->{isOrigTitle}) + { + $self->{curInfo}->{original} ||= $origtext; + $self->{isOrigTitle} = 0; + } + if ($self->{isSKTitle} and $self->{lang} eq "SK") + { + $self->{curInfo}->{title} = $origtext; + $self->{isSKTitle} = 0; + } + } + + # Genre + if ($self->{insideGenre}) + { + $origtext =~ s/ \/ /,/g; + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + + # Extra movie info: country, date (year), time + if ($self->{insideInfo}) + { + my ($country, $year, $time) = split(', ', $origtext); + $country =~ s/ \/ /,/g; + + $self->{curInfo}->{country} = $country; + $self->{curInfo}->{date} = $year; + $self->{curInfo}->{time} = $time; + + $self->{insideInfo} = 0; + } + + # Directors and Actors + if ($self->{inside}->{h4}) + { + $self->{insideDirectors} = 1 if ($origtext =~ /^Režie:/); + $self->{insideActors} = 1 if ($origtext =~ /^Hrají:/); + } + + if ($self->{inside}->{a} and $self->{insideDirectors}) + { + push @{$self->{directors}}, $origtext; + $self->{directorsCounter}++; + } + if ($self->{inside}->{a} and $self->{insideActors}) + { + #push @{$self->{curInfo}->{actors}}, [$origtext] + # if ($self->{actorsCounter} < + # $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + #$self->{actorsCounter}++; + push @{$self->{actors}}, $origtext; + $self->{actorsCounter}++; + } + + # Synopsis + if ($self->{inside}->{h3}) + { + $self->{awaitingSynopsis} = 1 if ($origtext eq "Obsah"); + } + if ($self->{inside}->{li} and $self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext . "\n\n\n"; + } + + # Rating + if ($self->{insideRating}) + { + $origtext =~ s/([0-9]+)%/$1/; + $self->{curInfo}->{ratingpress} = int($origtext / 10 + .5) + if ($origtext ne ""); + $self->{insideRating} = 0; + } + + # Comments + if ($self->{inside}->{a} and $self->{insideCommentAuthor}) + { + $self->{curInfo}->{comment} .= $origtext . " napsal(a):\n"; + $self->{awaitingComment} = 1; + } + if ($self->{insideComment}) + { + $self->{curInfo}->{comment} .= $origtext . "\n\n"; + $self->{insideComment} = 0; + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # Do your init stuff here + + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + country => 1 + }; + + $self->{lang} = "CS"; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + $self->{debug} = ($ENV{GCS_DEBUG_PLUGIN_PHASE} > 0); + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCulturalia.pm b/lib/gcstar/GCPlugins/GCfilms/GCCulturalia.pm new file mode 100644 index 0000000..55c6692 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCulturalia.pm @@ -0,0 +1,241 @@ +package GCPlugins::GCfilms::GCCulturalia; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginCulturalia; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ /^\.\.\/art\/ver\.php\?art=/) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /\.\.\/(imatges\/articulos\/[0-9]*-1\.jpg)/) + { + $self->{curInfo}->{image} = "http://www.culturalianet.com/" . $1; + } + } + elsif ($tagname eq "font") + { + $self->{insideName} = 1 if $attr->{class} eq "titulo2"; + $self->{insideInfos} = 1 if $attr->{class} eq "titulo3"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + if ($origtext =~ /De ([^\(]*) \(([0-9]{4})\)/) + { + $self->{itemsList}[$self->{itemIdx}]->{"director"} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $2; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + } + else + { + $origtext =~ s/\.$//; + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext if !$self->{itemsList}[$self->{itemIdx}]->{"title"}; + } + return; + } + } + else + { + $origtext =~ s/\s{2,}//g; + $origtext =~ s/\n//g if !$self->{insideSynopsis}; + + if ($self->{insideName}) + { + if ($origtext =~ /([^\(]*)\. \(([0-9]{4})\)/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + } + $self->{insideName} = 0; + } + elsif ($self->{insideInfos}) + { + $origtext =~ s/ , //; + $origtext =~ s/(, )*$//; + if ($origtext =~ /Género\:(.*)/) + { + ($self->{curInfo}->{genre} = $1) =~ s/ \/ /,/g; + } + elsif ($origtext =~ /Nacionalidad\:(.*)/) + { + $self->{curInfo}->{country} = $1; + } + elsif ($origtext =~ /Director\:(.*)/) + { + $self->{curInfo}->{director} = $1; + } + elsif ($origtext =~ /Actores\:(.*)/) + { + $self->{curInfo}->{actors} = $1; + } + elsif ($origtext =~ /Sinopsis\:(.*)/) + { + ($self->{curInfo}->{synopsis} = $1) =~ s/, //; + } + elsif ($origtext =~ /Duración\:(.*)/) + { + ($self->{curInfo}->{time} = $1) =~ s/\.$//; + } + $self->{insideInfos} = 0; + } + elsif ($origtext =~ /^Sinopsis\:(.*)/) + { + ($self->{curInfo}->{synopsis} = $1) =~ s/, //; + $self->{curInfo}->{synopsis} =~ s/(, )*$//; + } + if ($self->{inside}->{i}) + { + $self->{curInfo}->{original} = $origtext; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{</?b>}{}g; + $html =~ s/<br>/, /g; + $html =~ s{<a href=\.\./art/ver_e\.php\?nombre=[0-9]*>([^<]*)</a>} + {$1}g; + $html =~ s{<font class.=..titulo3.>([^<]*)</font>([^<]*)} + {<font class ='titulo3'>$1 $2</font>}g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.culturalianet.com/bus/resu.php?texto=$word&donde=1"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.culturalianet.com/bus/" . $url; + } + + sub getName + { + return "CulturaliaNet"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'ES'; + } + + sub getCharset + { + my $self = shift; + + return "Windows-1252"; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDVDEmpire.pm b/lib/gcstar/GCPlugins/GCfilms/GCDVDEmpire.pm new file mode 100644 index 0000000..a32a7a8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDVDEmpire.pm @@ -0,0 +1,427 @@ +package GCPlugins::GCfilms::GCDVDEmpire; + +################################################### +# +# Copyright 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginDVDEmpire; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start { + my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; + + $self->{inside}->{$tagname}++; + + if ( $self->{parsingList} ) { + if ( $self->{outOfMovieList} ) + { + return; + } + elsif (( $self->{inMovieList} ) + && ( $self->{inMovie} eq 0 ) + && ( $tagname eq 'a' ) + && ( $attr->{href} =~ /^(\/Exec\/v4_item.asp\?item_id=[0-9]*)$/ ) ) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{inMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (( $self->{inMovie} ) + && ( $tagname eq 'img' ) + && ( $attr->{src} =~ /(.*gen\/movies\/[0-9]*t.jpg)/ ) ) + { + (my $image = $attr->{src}) =~ s/t.jpg$/h.jpg/; + $self->{itemsList}[ $self->{itemIdx} ]->{image} = $image; + } + elsif (( $self->{inMovie} ) + && ( $tagname eq 'a' ) + && ( $attr->{href} =~ /cast_id/ ) ) + { + $self->{isActors} = 1; + } + elsif (( $self->{inMovie} ) + && ( $tagname eq 'td' ) + && ( $attr->{bgcolor} eq '#D7DDE7' ) ) + { + $self->{inMovie} = 0; + } + elsif (( $tagname eq 'div' ) + && ( $attr->{id} eq 'Search_Container' ) ) + { + $self->{inMovieList} = 1; + } + elsif ( ( $self->{inMovieList} ) + && ( $tagname eq 'endsearch' )) + { + $self->{inMovieList} = 0; + $self->{outOfMovieList} = 1; + } + } + else { + if ( $self->{parsingEnded} ) + { + if (!$self->{infoSet}) + { + $self->{curInfo}->{image} = $self->{itemsList}[$self->{wantedIdx}]->{image}; + $self->{curInfo}->{date} = $self->{itemsList}[$self->{wantedIdx}]->{date}; #"short text" + $self->{curInfo}->{time} = $self->{itemsList}[$self->{wantedIdx}]->{time}; #"short text" + $self->{curInfo}->{age} = $self->{itemsList}[$self->{wantedIdx}]->{age}; #"options" + ($self->{curInfo}->{backpic} = $self->{curInfo}->{image}) =~ s/h.jpg/b.jpg/; #"image" + + $self->{infoSet} = 1; + } + return; + } + elsif ( ($tagname eq 'div') + && ($attr->{id} eq 'Search_Container') ) + { + $self->{isContent} = 1; + } + elsif ( ( $self->{isContent} ) + && ( $tagname eq 'div' ) ) + { + $self->{inNonContentDiv} = 1; + } + elsif ( $self->{isContent}) + { + if ( ($tagname eq 'td') + && ($attr->{class} eq 'fontxlarge') ) + { + $self->{isTitle} = 1 ; + } + elsif ($tagname eq 'rating') + { + $self->{isRating} = 1 ; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1 ; + } + elsif ( ($self->{isActors}) + && ($tagname eq 'a') + && ($attr->{href} =~ /v4_list_cast.asp/) ) + { + $self->{isActor} = 1 ; + } + elsif ($tagname eq 'directors') + { + $self->{isDirectors} = 1 ; + } + elsif ( ($self->{isDirectors}) + && ($tagname eq 'a') + && ($attr->{href} =~ /v4_list_cast.asp/) ) + { + $self->{isDirector} = 1 ; + } + elsif ($tagname eq 'genres') + { + $self->{isGenres} = 1 ; + } + elsif ( ($self->{isGenres}) + && ($tagname eq 'a') + && ($attr->{href} =~ /v2_category.asp/) ) + { + $self->{isGenre} = 1 ; + } + elsif ($tagname eq 'audio') + { + $self->{inAudio} = 1 ; + } + elsif ( ($self->{inAudio}) + && ($tagname eq 'td') ) + { + $self->{isAudio} = 1 ; + } + elsif ( ($self->{isTitle}) + && ($tagname eq 'strong') ) + { + $self->{isTitle} = 2 ; + } + elsif ( ($self->{startSynopsis}) + && ($tagname eq 'td') ) + { + $self->{isSynopsis} = 1 ; + } + elsif ( ($self->{isSynopsis}) + && ($tagname eq 'br') ) + { + $self->{synopsisLineBreak} = 1 ; + } + } + } + } + + sub end { + my ( $self, $tagname ) = @_; + + $self->{inside}->{$tagname}--; + + if ( !$self->{parsingList} ) + { + if ( $self->{parsingEnded} ) + { + return; + } + if ($self->{isContent}) + { + if ( ( $tagname eq 'div' ) + && ( !$self->{inNonContentDiv} ) ) + { + $self->{isContent} = 0; + $self->{parsingEnded} = 1; + } + elsif ( ( $tagname eq 'div' ) + && ( $self->{inNonContentDiv} ) ) + { + $self->{inNonContentDiv} = 0; + } + elsif ( ( $tagname eq 'table' ) + && ( $self->{isSynopsis} ) ) + { + $self->{startSynopsis} = 0; + $self->{SynopsisEnded} = 1; + $self->{isSynopsis} = 0; + } + elsif ( ($self->{isActors} ) && ($tagname eq 'actors') ) + { + $self->{isActors} = 0 ; + } + elsif ( ($self->{isGenres} ) && ($tagname eq 'genres') ) + { + $self->{isGenres} = 0 ; + } + elsif ( ($self->{isDirectors} ) && ($tagname eq 'directors') ) + { + $self->{isDirectors} = 0 ; + } + elsif ( ($self->{isAudio}) + && ($tagname eq 'td') ) + { + $self->{isAudio} = 0 ; + $self->{inAudio} = 0 ; + } + } + } + + } + + sub text { + my ( $self, $origtext ) = @_; + + if ( $self->{parsingList} ) + { + if ( ( $self->{inMovieList} ) && ( $self->{isMovie} ) ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + } + elsif ( ( $self->{inMovie} ) && ( $origtext =~ /([^~]*)~~~([0-9]*)mins.~~~Release Date:[^~]*~~~Prod Year: ([0-9]{4})/ ) ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 1 + if ( $1 eq 'Unrated' ) || ( $1 eq 'Open' ); + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 2 + if ( $1 eq 'G' ) || ( $1 eq 'Approved' ); + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 5 + if ( $1 eq 'PG' ) || ( $1 eq 'M' ) || ( $1 eq 'GP' ); + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 13 + if $1 eq 'PG-13'; + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 17 + if $1 eq 'R'; + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 18 + if ( $1 eq 'NC-17' ) || ( $1 eq 'X' ); + + $self->{itemsList}[ $self->{itemIdx} ]->{time} = $2 . ' min'; + + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $3; + } + elsif ( ( $self->{inMovie} ) && ( $self->{isActors} ) ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} .= $origtext . ', '; + $self->{isActors} = 0; + } + } + else { + $origtext =~ s/^\s*//; + + return if !$origtext; + if ( $self->{parsingEnded} ) + { + return; + } + if ($self->{isContent}) + { + if ( $self->{isTitle} eq 2) + { + $self->{curInfo}->{title} = $origtext; #"short text" + $self->{curInfo}->{original} = $origtext; #"short text" + $self->{isTitle} = 0 ; + } + elsif ( $self->{isRating}) + { + $self->{curInfo}->{ratingpress} = int($origtext * 2); #"number" + $self->{isRating} = 0 ; + } + elsif ( ( !$self->{SynopsisEnded} ) + && ( $origtext eq 'Synopsis' ) ) + { + $self->{startSynopsis} = 1 ; + } + elsif ( $self->{isSynopsis} ) + { + $self->{curInfo}->{synopsis} .= "\n\n" if $self->{synopsisLineBreak}; + $self->{curInfo}->{synopsis} .= $origtext ; #"long text" + $self->{curInfo}->{synopsis} .= " " if $self->{synopsisLineBreak}; + $self->{synopsisLineBreak} = 0 ; + } + elsif ( $self->{isActor} ) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{isActor} = 0 ; + } + elsif ( $self->{isGenre} ) + { + push @{$self->{curInfo}->{genre}}, [$origtext]; + $self->{isGenre} = 0 ; + } + elsif ( $self->{isDirector} ) + { + $self->{curInfo}->{director} .= $origtext; #"long text" + $self->{isDirector} = 0 ; + $self->{isDirectors} = 0 ; + } + elsif ( $self->{isAudio} ) + { + (my $language = $origtext) =~ s/([^:]*):(.*)/$1/ ; + my $audio = $2 ; + $language =~ s/\s// ; + $audio =~ s/\r// ; + push @{$self->{curInfo}->{audio}}, [$language, $audio]; + } + } + } + } + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless( $self, $class ); + + $self->{hasField} = { + title => 1, + date => 1, + actors => 1, + age => 1, + time => 1, + image => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{inMovie} = 0; + $self->{isContent} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess { + my ( $self, $html ) = @_; + + $self->{parsingEnded} = 0; + + if ($self->{parsingList}) + { + $html =~ s/<\/nobr>[ ]*~[ ]*<nobr>/~~~/g ; + $html =~ s/<b>Phone #:<\/b>/<endsearch>here<\/endsearch>/g ; + + } + else + { + $html =~ s/<b>([0-9\.]*)<\/b> out of <b>5<\/b>/<rating>$1<\/rating>/g ; #/ + $html =~ s/<b>Actors:<\/b>/<actors>/g ; + $html =~ s/<b>Writers:<\/b>/<\/actors>/g ; + $html =~ s/<b>Directors:<\/b>(.*cast_id[^\/]*<\/a>)/<directors>$1<\/directors>/g ; #/ + $html =~ s/<b>Genre<\/b>(.*cat_id[^\/]*<\/a>)/<genres>$1<\/genres>/g ; #/ + $html =~ s/<b>Audio:<\/b>/<audio><\/audio>/g ; + $html =~ s/<font face='[^']*' size='[^']*' color='#FFFFFF'>i<\/font>/ /g ; + } + + return $html; + } + + sub getSearchUrl { + my ( $self, $word ) = @_; + + my $searchvalue = 32 ; + my $strictmatching = 0; + if ($strictmatching) + { + $searchvalue = 64 ; + } + return "http://www.dvdempire.com/Exec/v1_search_all.asp?string=$word&pp=5&search_refined=$searchvalue"; + } + + sub getItemUrl { + my ( $self, $url ) = @_; + + return 'http://www.dvdempire.com/' . $url; + } + + sub changeUrl { + my ( $self, $url ) = @_; + + return $url; + } + + sub getName { + return "DVDEmpire (EN)"; + } + + sub getCharset { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getAuthor { + return 'FiXx'; + } + + sub getLang { + return 'EN'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDVDFr.pm b/lib/gcstar/GCPlugins/GCfilms/GCDVDFr.pm new file mode 100644 index 0000000..d75c2f6 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDVDFr.pm @@ -0,0 +1,374 @@ +package GCPlugins::GCfilms::GCDVDFr; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginDVDFr; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'dvd') + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + elsif ($tagname eq 'id') + { + $self->{isID} = 1; + } + elsif ($tagname eq 'fr') + { + $self->{isTitleFR} = 1; + } + elsif (($tagname eq 'star') && ($attr->{type} =~ /R.alisateur/)) + { + $self->{isDirector} = 1; + } + elsif ($tagname eq "media") + { + $self->{isMedia} = 1; + } + elsif ($tagname eq "edition") + { + $self->{isEdition} = 1; + } + } + else + { + if (($tagname eq "cover") || ($tagname eq "jaquette")) + { + $self->{insideImage} = 1; + } + elsif ($tagname eq "url") + { + $self->{insideURL} = 1; + } + elsif (($tagname eq "fr") || ($tagname eq "titres_fr")) + { + $self->{insideTitleFR} = 1; + } + elsif (($tagname eq "vo") || ($tagname eq "titres_vo")) + { + $self->{insideTitleVO} = 1; + } + elsif ($tagname eq "pays") + { + $self->{insideNat} = 1; + } + elsif ($tagname eq "annee") + { + $self->{insideYear} = 1; + } + elsif ($tagname eq "synopsis") + { + $self->{insideSynopsis} = 1; + } + elsif ($tagname eq "duree") + { + $self->{insideTime} = 1; + } + elsif ($tagname eq "realisateur") + { + $self->{insideDirector} = 1; + } + elsif ($tagname eq "star") + { + $self->{insideDirector} = 1 if $attr->{type} eq "Réalisateur"; + $self->{insideActors} = 1 + if (! $attr->{type}) || ($attr->{type} eq "Acteur"); + } + elsif ($tagname eq "categorie") + { + $self->{insideGenre} = 1; + } + elsif ($tagname eq "rating") + { + $self->{curInfo}->{age} = 2 if $attr->{id} == 1; + $self->{curInfo}->{age} = 5 if $attr->{id} == 2; + $self->{curInfo}->{age} = 12 if $attr->{id} == 3; + $self->{curInfo}->{age} = 13 if $attr->{id} == 4; + $self->{curInfo}->{age} = 16 if $attr->{id} == 5; + $self->{curInfo}->{age} = 18 if $attr->{id} > 5; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if (length($origtext) < 2) && (! $self->{isID}); + + if ($self->{parsingList}) + { + if ($self->{isID}) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.dvdfr.com/api/dvd.php?id=$origtext"; + $self->{isID} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[$self->{itemIdx}]->{"director"} .= $self->{itemsList}[$self->{itemIdx}]->{"director"} ? ", " . $origtext : $origtext + if ($self->{directorCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_DIRECTORS); + $self->{directorCounter}++; + $self->{isDirector} = 0; + } + elsif ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{directorCounter} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isTitleFR}) + { + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext; + $self->{isTitleFR} = 0; + } + elsif ($self->{isMedia}) + { + $origtext = '' if $origtext !~ /\w/; + $self->{itemsList}[$self->{itemIdx}]->{"format"} = $origtext; + $self->{isMedia} = 0; + } + elsif ($self->{isEdition}) + { + $origtext = '' if $origtext !~ /\w/; + $self->{itemsList}[$self->{itemIdx}]->{"extra"} = $origtext; + $self->{isEdition} = 0; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideImage}) + { + if ($origtext =~ m|/microapp/jaquette.php\?id=([0-9]*)|) + { + my $dir = int($1 / 1000); + $self->{curInfo}->{image} = "http://dvdfr.com/images/dvd/cover_200x280/$dir/$1.jpg"; + } + else + { + $self->{curInfo}->{image} = $origtext; + } + $self->{insideImage} = 0; + } + elsif ($self->{insideURL}) + { + $self->{curInfo}->{$self->{urlField}} = $origtext; + $self->{insideURL} = 0; + } + elsif ($self->{insideTitleFR}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitleFR} = 0; + } + elsif ($self->{insideTitleVO}) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideTitleVO} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} .= $self->{curInfo}->{country} ? ", " . $origtext : $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideYear}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideYear} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{curInfo}->{synopsis} =~ s/\n/ /g; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} .= $self->{curInfo}->{director} ? ", " . $origtext : $origtext + if ($self->{directorCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_DIRECTORS); + $self->{directorCounter}++; + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + $self->{curInfo}->{actors} .= $self->{curInfo}->{actors} ? ", " . $origtext : $origtext + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{insideActors} = 0; + } + elsif ($self->{insideGenre}) + { + $self->{curInfo}->{genre} .= $self->{curInfo}->{genre} ? "," . $origtext : $origtext; + $self->{insideGenre} = 0; + } + elsif (($self->{inside}->{track}) && ($self->{inside}->{langue})) + { + if ($self->{curInfo}->{audio} !~ /(^|,)$origtext(,|$)/) + { + $self->{curInfo}->{audio} .= ',' if $self->{curInfo}->{audio}; + $self->{curInfo}->{audio} .= $origtext; + } + } + elsif (($self->{inside}->{soustitrage}) && ($self->{inside}->{soustitre})) + { + if ($self->{curInfo}->{subt} !~ /(^|,)$origtext(,|$)/) + { + $self->{curInfo}->{subt} .= ',' if $self->{curInfo}->{subt}; + $self->{curInfo}->{subt} .= $origtext; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 1, + actors => 0, + format => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{directorCounter} = 0; + $self->{actorsCounter} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word = 'ean:'.$word + if $word =~ /^[\dX]{8}[\dX]*$/; + + return "http://www.dvdfr.com/api/search.php?title=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return "http://www.dvdfr.com/"; + } + + sub changeUrl + { + my ($self, $url) = @_; + + $url =~ s/\/dvd\//\/api\//; + + return $url; + } + + sub getName + { + return "DVDFr.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + + sub getExtra + { + return 'Edition'; + } + + sub getEanField + { + return 'title'; + } + + sub isPreferred + { + return 1; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDVDPost.pm b/lib/gcstar/GCPlugins/GCfilms/GCDVDPost.pm new file mode 100644 index 0000000..4bb6456 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDVDPost.pm @@ -0,0 +1,269 @@ +package GCPlugins::GCfilms::GCDVDPost; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginDVDPost; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if (($attr->{href} =~ /^product_info\.php\?products_id=/)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /http:\/\/images\.dvdpost\.be\/\/dvd/) + { + $self->{curInfo}->{image} = $attr->{src}; + } + elsif ($self->{insideAge}) + { + (my $fileName = $attr->{src}) =~ s|.+/([^/]+)$|$1|; + $self->{curInfo}->{age} = 2 if $fileName eq 'all.gif'; + $self->{curInfo}->{age} = 12 if $fileName eq '-12.gif'; + $self->{curInfo}->{age} = 16 if $fileName eq '-16.gif'; + $self->{insideAge} = 0; + } + } + elsif ($tagname eq "table") + { + if ( ($attr->{cellpadding} eq "0") + && ($attr->{cellspacing} eq "0") + && ($attr->{width} eq "100%") + && ($attr->{border} ne "0")) + { + $self->{insideSynopsisFather} = 1; + } + } + elsif ($tagname eq "td") + { + if ($attr->{style} eq "text-align:right;font-size:9px;color:gray") + { + $self->{insideGenre} = 1; + } + elsif (($attr->{class} eq "boxText") && $attr->{align} eq "left") + { + if ($self->{insideSynopsisFather} == 1) + { + $self->{insideSynopsis} = 1; + $self->{insideSynopsisFather} = 0; + } + else + { + $self->{insideSynopsis} = 0; + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + } + else + { + $origtext =~ s/\n*//g if !$self->{insideSynopsis}; + $origtext =~ s/\s{2,}//g; + + if ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideDirector}) + { + $origtext =~ s/ ,/, /g; + $origtext =~ s/^(.*), /$1/; + $self->{curInfo}->{director} = $origtext if !$self->{curInfo}->{director}; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext . " min"; + $self->{insideTime} = 0; + } + elsif ($self->{insideActors}) + { + $origtext =~ s/ ,/, /g; + $origtext =~ s/^(.*), /$1/; + $self->{curInfo}->{actors} = $origtext if !$self->{curInfo}->{actors}; + $self->{insideActors} = 0; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext if !$self->{curInfo}->{original}; + $self->{insideOrig} = 0; + } + elsif ($self->{inside}->{b}) + { + $self->{insideDirector} = 1 if $origtext =~ m/R.alisateur/; + $self->{insideTime} = 1 if $origtext =~ m/Dur.e/; + $self->{insideActors} = 1 if $origtext =~ m/Acteurs/; + $self->{insideAge} = 1 if $origtext =~ m/Public/; + } + elsif ($self->{inside}->{table}) + { + if ($origtext =~ /(.*) \( ([0-9]{4}) \)/) + { + $self->{curInfo}->{title} = $1 if !$self->{curInfo}->{title}; + $self->{curInfo}->{date} = $2 if !$self->{curInfo}->{date}; + } + elsif ($self->{insideGenre}) + { + $origtext =~ s/\|/,/g; + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s/ / /g; + $html =~ s/<u>|<\/u>//g; + $html =~ s/<a href="directors\.php\?directors\_id=[0-9]*">([^<]*)<\/a>/$1/gi; + $html =~ s/<a href="actors\.php\?actors\_id=[0-9]*">([^<]*)<\/a>/$1/gi; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.dvdpost.be/advanced_search_result2.php?language=fr&keywords=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.dvdpost.be/" . $url . "&language=fr" unless $url eq ''; + return "http://www.dvdpost.be/"; + } + + sub getName + { + return "DVDPost.be"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDicshop.pm b/lib/gcstar/GCPlugins/GCfilms/GCDicshop.pm new file mode 100644 index 0000000..d50ea4d --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDicshop.pm @@ -0,0 +1,343 @@ +package GCPlugins::GCfilms::GCDicschop; + +################################################### +# +# Copyright 2005-2010 Tian, Michael Mayer +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginDicshop; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + return if $self->{parsingEnded}; + if ($tagname eq 'div') + { + if ($attr->{class} eq 'ds_l_h') + { + $self->{isMovie} = 1; + } + elsif ($attr->{class} eq 'ds_l_b') + { + $self->{isMovie} = 0; + } + elsif ($attr->{class} eq 'settingSavePlusContainer') + { + $self->{parsingEnded} = 1; + } + } + elsif ($self->{isMovie} && ($tagname eq 'a')) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + } + else + { + return if $self->{parsingEnded}; + + if ($tagname eq 'div') + { + if ($attr->{class} eq "header_section hs_spec") + { + $self->{isInfo} = 1; + } + elsif ($attr->{class} eq "header_section hs_omdomme") + { + $self->{isSynopsis} = 0; + } + elsif ($attr->{class} =~ m/right_cont_section/) + { + $self->{parsingEnded} = 1; + } + elsif (($attr->{class} =~ m/^item([12])$/) && $self->{isInfo}) + { + $self->{isItem} = $1; + } + elsif ($attr->{class} =~ m/ds_produkt_left/) + { + $self->{isCover} = 1; + } + elsif ($attr->{class} =~ m/ds_omdomme_top/) + { + $self->{isRating} = 1; + } + elsif ($attr->{class} =~ m/ds_omdomme_cust/) + { + $self->{isRating} = 0; + } + } + elsif ($tagname eq 'img') + { + if ($self->{isCover} && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $attr->{src}; + + if ($self->{bigPics}) + { + $self->{curInfo}->{image} =~ s|front_normal|front_large|; + $self->{curInfo}->{backpic} = $self->{curInfo}->{image}; + $self->{curInfo}->{backpic} =~ s|front_large|back_large|; + } + } + elsif ($self->{isRating}) + { + $self->{curInfo}->{ratingpress} += 2 + if ($attr->{src} =~ m/rate_big_1.gif/); + $self->{curInfo}->{ratingpress} += 1 + if ($attr->{src} =~ m/rate_big_05.gif/); + } + } + elsif ($tagname eq 'script') + { + $self->{isSynopsis} = 0; + } + elsif ($tagname eq 'br') + { + $self->{curInfo}->{synopsis} .= "\n" + if ($self->{isSynopsis} && $self->{curInfo}->{synopsis}); + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq 'div') + { + $self->{isCover} = 0; + } + elsif ($tagname eq 'p') + { + $self->{curInfo}->{synopsis} .= "\n" + if ($self->{isSynopsis} && $self->{curInfo}->{synopsis}); + } + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + return if !$origtext; + + if ($self->{parsingList}) + { + # evaluate the search result page + if ($self->{isMovie}) + { + if ($self->{inside}->{b}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + elsif ($self->{inside}->{div}) + { + $origtext =~ /^.*?(\d{4}) +(med +([^-.]*))?/; + if ($1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $3; + } + else + { + $origtext =~ /med +([^-.]*)/; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1 + if $1; + } + $self->{itemsList}[ $self->{itemIdx} ]->{actors} =~ s/ och/,/g; + $self->{isMovie} = 0; + } + } + } + else + { + return if $self->{parsingEnded}; + # evaluate the film details page + if ($self->{inside}->{h3}) + { + if ($origtext eq "Filmens handling") + { + $self->{isSynopsis} = 1; + } + } + elsif ($self->{isSynopsis}) # important: elsif, not only if! + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{isItem} == 1) + { + $self->{key} = $origtext; + } + elsif ($self->{isItem} == 2) + { + if ( ($self->{key} eq "Grupp:") + or ($self->{key} eq "Genre:") + or ($self->{key} eq "Underkategori:")) + { + $origtext =~ s| *film$||i; # remove the trailing "film" + $origtext =~ s|/|,|i; + $self->{curInfo}->{genre} .= $origtext . "," + if (!($self->{curInfo}->{genre} =~ m/$origtext/)); + } + elsif ($self->{key} eq "Speltid:") + { + $self->{curInfo}->{time} = $origtext; + } + elsif ($self->{key} eq "Svensk titel:") + { + $self->{curInfo}->{title} = $origtext; + } + elsif ($self->{key} eq "Originaltitel:") + { + $self->{curInfo}->{original} = $origtext; + } + elsif ($self->{key} eq "Produktionsland:") + { + if ($self->{curInfo}->{country}) { + $self->{curInfo}->{country} .= ", "; + } + $self->{curInfo}->{country} .= $origtext; + } + elsif ($self->{key} =~ m/Premi.*r:/) + { + $self->{curInfo}->{date} = $origtext; + } + elsif ($self->{key} eq "Regi:") + { + if ($self->{curInfo}->{director}) + { + $self->{curInfo}->{director} .= ", "; + } + $self->{curInfo}->{director} = $origtext; + } + elsif ($self->{key} =~ m/despelare:$/) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + elsif ($self->{key} =~ m/ldersgr.*ns:/) + { + $origtext =~ m/^(\d+) /; + $self->{curInfo}->{"age"} = $1; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 1, + age => 1, + }; + + $self->{isInfo} = 0; + $self->{isRating} = 0; + $self->{isCover} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{isTitle} = 0; + $self->{isSynopsis} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.discshop.se/shop/search_solr.php?lang=&cont=ds&" + . "soktext=$word&subsite_set=movies&lang=se&subsite=bluray&&ref="; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.discshop.se/shop/' . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Discshop.se"; + } + + sub getCharset + { + my $self = shift; + + return "Windows-1252"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'SV'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDoubanfilm.pm b/lib/gcstar/GCPlugins/GCfilms/GCDoubanfilm.pm new file mode 100644 index 0000000..e3e8563 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDoubanfilm.pm @@ -0,0 +1,255 @@ +package GCPlugins::GCfilms::GCDoubanfilm; + +################################################### +# +# Copyright 2005-2010 Bai Wensimi +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginDoubanfilm; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if (($page =~ /^bad imdb/) & ($page =~ /^The/)); + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page =~ /feed>$/) + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + foreach my $ItemMovie( @{$xml->{'entry'}}){ + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $ItemMovie->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $ItemMovie->{'title'}; + foreach my $tmp_author (@{$ItemMovie->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'director'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=',';} + $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=$tmp_author->{'name'}; + } + foreach my $check1(@{$ItemMovie->{'db:attribute'}}){ + my $db_attr1=$check1->{'name'}; + SWITCH1: { + $db_attr1 eq 'country' and $self->{itemsList}[ $self->{itemIdx} ]->{'country'}=$check1->{'content'} ,last SWITCH1; + $db_attr1 eq 'pubdate' and $self->{itemsList}[ $self->{itemIdx} ]->{'date'}=$check1->{'content'} ,last SWITCH1; + ; + } + } + } + } + else + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $xml->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $xml->{'title'}; + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=$tmp_author->{'name'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=','; + } + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH: { + $db_attr eq 'country' and $self->{itemsList}[ $self->{itemIdx} ]->{country}=$check->{'content'} ,last; + $db_attr eq 'pubdate' and $self->{itemsList}[ $self->{itemIdx} ]->{date}=$check->{'content'} ,last; + } + } + } + } + else + { + $xml =$xs->XMLin($page, + ForceArray => [ 'author' ], + KeyAttr => {'db:tag'=>'name','link'=>'rel'}); + foreach my $tmp_author (@{$xml->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'director'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=',';} + $self->{curInfo}->{director}.=$tmp_author->{'name'}; + } + $self->{curInfo}->{title}=$xml->{'title'}; + $self->{curInfo}->{original}=$xml->{'title'}; + $self->{curInfo}->{webPage}=$xml->{'link'}->{'alternate'}->{'href'}; + $self->{curInfo}->{synopsis}=$xml->{'summary'}; + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH2: { + $db_attr eq 'country' and $self->{curInfo}->{country}=$check->{'content'} ,last SWITCH2; + $db_attr eq 'pubdate' and $self->{curInfo}->{date}=$check->{'content'} ,last SWITCH2; + $db_attr eq 'cast' and { ($self->{curInfo}->{actors} ne '' ) and $self->{curInfo}->{actors}.=',' }, $self->{curInfo}->{actors}.=$check->{'content'} ,last SWITCH2; + ; + } + } + + my $tmp_image=$xml->{'link'}->{'image'}->{'href'}; + $tmp_image =~ s/spic/lpic/; + $self->{curInfo}->{image}=$tmp_image; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + director => 1, + date => 1, + country => 1, + }; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + if ($self->{searchField} eq 'imdb') + { + return "http://api.douban.com/movie/subject/imdb/" .$word; + } + else + { + return "http://api.douban.com/movie/subjects?q=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "豆瓣"; + } + + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + country => 1, + }; + + } + + sub getAuthor + { + return 'BW'; + } + + sub getLang + { + return 'ZH'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "UTF-8"; + } + sub getSearchFieldsArray + { + return ['imdb', 'title']; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'ZH'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityEN.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityEN.pm new file mode 100644 index 0000000..2774b36 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityEN.pm @@ -0,0 +1,334 @@ +package GCPlugins::GCfilms::GCFilmAffinityEN; + +################################################### +# +# Copyright 2005-2007 Tian +# Edited 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmAffinityEN; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ( ($tagname eq 'a') + && ($attr->{href} =~ /\/en\/.*\.php\?movie_id=([0-9]*)/)) + { + $self->{hasUrl} = 'film' . $1 . '.html'; + } + } + elsif (!$self->{isMovie} + && ($tagname eq 'a') + && ($attr->{href} =~ /^\/en\/(film.*)$/)) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq 'span') + && ($attr->{style} eq 'font-size: 10px; color:#666666')) + { + $self->{isDirector} = 1; + } + elsif (($tagname eq 'div') + && ($attr->{style} eq 'font-size: 10px')) + { + $self->{isActors} = 1; + } + } + else + { + if ( ($tagname eq 'span') + && ($attr->{style} eq 'color:#990000; font-size:16; font-weight: bold;')) + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'img') + { + if ($attr->{src} =~ /^\/imgs\/countries/) + { + $self->{curInfo}->{country} = $attr->{title}; + } + elsif ($attr->{src} =~ /pics.*filmaffinity\.com\/.*-full\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src} + if not exists $self->{curInfo}->{image}; + } + } + elsif ($tagname eq 'a') + { + if ($attr->{href} =~ /pics.*filmaffinity\.com\/.*-large\.jpg/) + { + $self->{curInfo}->{image} = $attr->{href}; + } + } + elsif ($tagname eq 'td') + { + if ($attr->{style} =~ /font-size:22px; font-weight: bold;/) + { + $self->{isRating} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ($self->{hasUrl}) + { + $self->{itemsList}[0]->{url} = $self->{hasUrl}; + $self->{hasUrl} = 0; + } + return; + } + if ($self->{inside}->{title} && ($origtext !~ /^Search\s+for /)) + { + $self->{parsingEnded} = 1; + $self->{hasUrl} = 0; + $self->{itemIdx} = 0; + } + elsif ($self->{isMovie}) + { + return if $origtext !~ /\w/; + return if $origtext eq 'Add to lists'; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isTitle} = 1; + } + elsif ($self->{isTitle}) + { + (my $year = $origtext) =~ s/\s*\(([0-9]{4})\)\s*/$1/; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $year; + $self->{isTitle} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $origtext; + $self->{isActors} = 0; + } + } + else + { + $origtext =~ s/^\s*//; + + return if !$origtext; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isOrig} = 0; + } + elsif ($self->{isDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{isDate} = 0; + } + elsif ($self->{isTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{isTime} = 0; + } + elsif ($self->{isDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + if ($self->{inside}->{a} && $origtext) + { + $origtext =~ s/\n//g; + $self->{curInfo}->{actors} .= $origtext . ', '; + } + } + elsif ($self->{isGenre}) + { + $self->{curInfo}->{genre} = $origtext; + $self->{curInfo}->{genre} =~ s/\s*\/\s*/,/g; + $self->{isGenre} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + elsif ($self->{isRating}) + { + $origtext =~ s/,/\./; # replace comma + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + $self->{isRating} = 0; + } + + if ($self->{inside}->{b}) + { + if ($origtext eq 'ORIGINAL TITLE') + { + $self->{isOrig} = 1; + } + elsif ($origtext eq 'YEAR') + { + $self->{isDate} = 1; + } + elsif ($origtext eq 'RUNNING TIME') + { + $self->{isTime} = 1; + } + elsif ($origtext eq 'DIRECTOR') + { + $self->{isDirector} = 1; + } + elsif ($origtext eq 'CAST') + { + $self->{isActors} = 1; + } + elsif ($origtext eq 'STUDIO/PRODUCER') + { + $self->{curInfo}->{actors} =~ s/, $//; + $self->{isActors} = 0; + } + elsif ($origtext eq 'GENRE') + { + $self->{isGenre} = 1; + } + elsif ($origtext eq 'SYNOPSIS/PLOT') + { + $self->{isSynopsis} = 1; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.filmaffinity.com/en/search.php?" + ."stext=$word&stype=title"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.filmaffinity.com/en/' . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Film affinity (EN)"; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'Tian & PIN edited by FiXx'; + } + + sub getLang + { + return 'EN'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityES.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityES.pm new file mode 100644 index 0000000..4c39ae5 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityES.pm @@ -0,0 +1,334 @@ +package GCPlugins::GCfilms::GCFilmAffinityES; + +################################################### +# +# Copyright 2005-2007 Tian +# Edited 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmAffinityES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ( ($tagname eq 'a') + && ($attr->{href} =~ /\/es\/.*\.php\?movie_id=([0-9]*)/)) + { + $self->{hasUrl} = 'film' . $1 . '.html'; + } + } + elsif (!$self->{isMovie} + && ($tagname eq 'a') + && ($attr->{href} =~ /^\/es\/(film.*)$/)) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq 'span') + && ($attr->{style} eq 'font-size: 10px; color:#666666')) + { + $self->{isDirector} = 1; + } + elsif (($tagname eq 'div') + && ($attr->{style} eq 'font-size: 10px')) + { + $self->{isActors} = 1; + } + } + else + { + if ( ($tagname eq 'span') + && ($attr->{style} eq 'color:#990000; font-size:16; font-weight: bold;')) + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'img') + { + if ($attr->{src} =~ /^\/imgs\/countries/) + { + $self->{curInfo}->{country} = $attr->{title}; + } + elsif ($attr->{src} =~ /pics.*filmaffinity\.com\/.*-full\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src} + if not exists $self->{curInfo}->{image}; + } + } + elsif ($tagname eq 'a') + { + if ($attr->{href} =~ /pics.*filmaffinity\.com\/.*-large\.jpg/) + { + $self->{curInfo}->{image} = $attr->{href}; + } + } + elsif ($tagname eq 'td') + { + if ($attr->{style} =~ /font-size:22px; font-weight: bold;/) + { + $self->{isRating} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ($self->{hasUrl}) + { + $self->{itemsList}[0]->{url} = $self->{hasUrl}; + $self->{hasUrl} = 0; + } + return; + } + if ($self->{inside}->{title} && ($origtext !~ /^Búsqueda\s+de /)) + { + $self->{parsingEnded} = 1; + $self->{hasUrl} = 0; + $self->{itemIdx} = 0; + } + elsif ($self->{isMovie}) + { + return if $origtext !~ /\w/; + return if $origtext eq 'Añadir a listas'; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isTitle} = 1; + } + elsif ($self->{isTitle}) + { + (my $year = $origtext) =~ s/\s*\(([0-9]{4})\)\s*/$1/; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $year; + $self->{isTitle} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $origtext; + $self->{isActors} = 0; + } + } + else + { + $origtext =~ s/^\s*//; + + return if !$origtext; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isOrig} = 0; + } + elsif ($self->{isDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{isDate} = 0; + } + elsif ($self->{isTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{isTime} = 0; + } + elsif ($self->{isDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + if ($self->{inside}->{a} && $origtext) + { + $origtext =~ s/\n//g; + $self->{curInfo}->{actors} .= $origtext . ', '; + } + } + elsif ($self->{isGenre}) + { + $self->{curInfo}->{genre} = $origtext; + $self->{curInfo}->{genre} =~ s/\s*\/\s*/,/g; + $self->{isGenre} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + elsif ($self->{isRating}) + { + $origtext =~ s/,/\./; # replace comma + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + $self->{isRating} = 0; + } + + if ($self->{inside}->{b}) + { + if ($origtext eq 'TÍTULO ORIGINAL') + { + $self->{isOrig} = 1; + } + elsif ($origtext eq 'AÑO') + { + $self->{isDate} = 1; + } + elsif ($origtext eq 'DURACIÓN') + { + $self->{isTime} = 1; + } + elsif ($origtext eq 'DIRECTOR') + { + $self->{isDirector} = 1; + } + elsif ($origtext eq 'REPARTO') + { + $self->{isActors} = 1; + } + elsif ($origtext eq 'PRODUCTORA') + { + $self->{curInfo}->{actors} =~ s/, $//; + $self->{isActors} = 0; + } + elsif ($origtext eq 'GÉNERO') + { + $self->{isGenre} = 1; + } + elsif ($origtext eq 'SINOPSIS') + { + $self->{isSynopsis} = 1; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.filmaffinity.com/es/search.php?" + ."stext=$word&stype=title"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.filmaffinity.com/es/' . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Film affinity (ES)"; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'Tian & PIN edited by FiXx'; + } + + sub getLang + { + return 'ES'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmUP.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmUP.pm new file mode 100644 index 0000000..8a47dff --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmUP.pm @@ -0,0 +1,252 @@ +package GCPlugins::GCfilms::GCFilmUP; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +#use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmUP; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub getSearchUrl + { + my ($self, $word) = @_; + my $url; + + $url = + "http://filmup.leonardo.it/cgi-bin/search.cgi?" + . "ps=10&fmt=long&q=$word" + . "&ul=%25%2Fsc_%25&x=52&y=7&m=all&wf=2221&wm=wrd&sy=0"; + + return $url; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return 'http://filmup.leonardo.it/'; + } + + sub getName + { + return "FilmUP"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'IT'; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'a') + { + if ($self->{insideInfos}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $self->{lasUrl}; + $self->{insideInfos} = 0; + } + + $self->{lasUrl} = $attr->{href}; + } + } + else + { + if ($tagname eq 'img') + { + $self->{curInfo}->{image} = $self->getItemUrl . $attr->{src} + if $attr->{src} =~ /^locand\// && ($attr->{src} ne 'locand/no.gif'); + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{inside}->{dt} && $self->{inside}->{a}) + { + if ($origtext =~ m/FilmUP - Scheda: (.*)/) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $1; + $self->{insideInfos} = 1; + } + } + if ( $self->{inside}->{small} + && $self->{inside}->{table} + && $self->{insideInfos}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ /Anno: ([0-9]+)/; + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $1 + if $origtext =~ /Regia: (.*?)((Sito)|$)/; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1 + if $origtext =~ /Cast: (.*?)$/; + } + } + else + { + if ($self->{inside}->{h1}) + { + $self->{curInfo}->{title} = $origtext; + } + elsif ($self->{inside}->{td} && ($origtext !~ /^[\r\n]+$/)) + { + $self->{insideTime} = 0 if $origtext =~ /Regia:/; + if ($self->{insideOriginal}) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideGenre}) + { + if (!$self->{curInfo}->{genre}) + { + $origtext =~ s|/|,|; + $self->{curInfo}->{genre} = $origtext; + } + $self->{insideGenre} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + $self->{curInfo}->{actors} = $origtext; + $self->{insideActors} = 0; + } + + $self->{insideOriginal} = 1 if $origtext =~ /Titolo originale:/; + $self->{insideNat} = 1 if $origtext =~ /Nazione:/; + $self->{insideDate} = 1 if $origtext =~ /Anno:/; + $self->{insideGenre} = 1 if $origtext =~ /Genere:/; + $self->{insideTime} = 1 if $origtext =~ /Durata:/; + $self->{insideDirector} = 1 if $origtext =~ /Regia:/; + $self->{insideActors} = 1 if $origtext =~ /Cast:/; + } + if ($self->{inside}->{synopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s/\222/'/g; + + $html =~ s{<font face="arial, helvetica" size="3">(.*?)</font>} + {<h1>$1</h1>}g; + $html =~ s{</table>.<br>.<font face="arial, helvetica" size="2">(.*?)</font>} + {</table><synopsis>$1</synopsis>}ms; + $html =~ s{<font face="arial, helvetica" size="2">Trama:(.*?)</font>} + {<synopsis>$1</synopsis>}; + $html =~ s{Trama:<br>}{}; + $html =~ s{<span .*?>|</span>} {}g; + $html =~ s{<a .*?href="\/?personaggi.*?>(.+?)</a>} {$1}g; + + $html =~ s{<font .*?>|</font>} {}g; + $html =~ s{</?b>} {}g; + + return $html; + } + + sub getCharset + { + my $self = shift; + + return "Windows-1252"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmWeb.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmWeb.pm new file mode 100644 index 0000000..f7c18cd --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmWeb.pm @@ -0,0 +1,369 @@ +package GCPlugins::GCfilms::GCFilmWeb; + +################################################### +# +# Copyright 2005-2010 Tian, Michael Mayer +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmWeb; + + use LWP::Simple qw($ua); + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ( ($tagname eq 'input') + && ($attr->{name} eq 'id')) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = + 'http://www.filmweb.pl/Film?id=' . $attr->{value}; + } + } + + if ($tagname eq 'a') + { + if ($attr->{class} eq 'searchResultTitle') + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + elsif ($attr->{href} =~ m|/search/film\?countryIds=|) + { + $self->{isCountry} = 1; + } + } + elsif ($tagname eq 'span') + { + if ($attr->{class} eq 'searchResultDetails') + { + $self->{isYear} = 1; + } + } + } + else + { + return if ($self->{parsingEnded}); + + if ($tagname eq 'strong') + { + if ($attr->{class} eq "rating") + { + $self->{isRating} = 1; + } + } + elsif ($tagname eq 'div') + { + if ($attr->{class} eq "time") + { + $self->{isTime} = 1; + } + elsif ($attr->{class} eq "posterLightbox") + { + $self->{isImage} = 1; + } + elsif ($attr->{class} =~ /castListWrapper/) + { + $self->{isCast} = 1; + } + elsif ($attr->{class} =~ /additional-info/) + { + $self->{parsingEnded} = 1; + } + } + elsif ($tagname eq 'span') + { + if ($attr->{class} eq 'filmDescrBg') + { + $self->{isSynopsis} = 1; + } + } + elsif (($tagname eq 'a') && $self->{isImage}) + { + # big image + $self->{curInfo}->{image} = $attr->{href}; + } + elsif (($tagname eq 'img') && $self->{isImage}) + { + # small image + $self->{curInfo}->{image} = $attr->{src} + if (!$self->{bigPics}); + $self->{isImage} = 0; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq "tr") + { + $self->{key} = ""; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + return if !$origtext; + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + + if ($self->{isMovie}) + { + if ($self->{inside}->{a}) + { + my $title; + my $original; + ($title, $original) = split (/\s*\/\s*/, $origtext, 2); + return if !$title; + + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $title; + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $original; + $self->{isMovie} = 0; + } + } + elsif ($self->{isYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ /([0-9]{4})/; + $self->{isYear} = 0; + } + elsif ($self->{isCountry}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{country} .= + $self->{itemsList}[ $self->{itemIdx} ]->{country} ? + ", " . $origtext + : $origtext; + $self->{isCountry} = 0; + } + } + else + { + + if ($self->{inside}->{title}) + { + # content of title field is formatted like this: + # Obcy - 8. pasażer "Nostromo" / Alien (1979) - Filmweb + # or (if polish title and original title are identical): + # Batman (1989) - Filmweb + $origtext =~ m|(.*)\s+\((\d{4})\)\s+-\s+Filmweb|; + $self->{curInfo}->{date} = $2; + ($self->{curInfo}->{original}, + $self->{curInfo}->{title}) = split (/\s+\/\s+/, $1, 2); + if (!$self->{curInfo}->{title}) + { + $self->{curInfo}->{title} = $self->{curInfo}->{original}; + } + } + elsif ($self->{isRating}) + { + $origtext =~ s/,/\./; + $self->{curInfo}->{ratingpress} = int ($origtext + 0.5); + $self->{isRating} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + elsif ($self->{inside}->{th}) + { + $self->{key} = $origtext; + } + elsif ($self->{inside}->{td} && $self->{inside}->{a}) + { + if ($self->{key} eq "reżyseria:") + { + $self->{curInfo}->{director} .= + $self->{curInfo}->{director} ? ", " . $origtext : $origtext; + } + if ($self->{key} eq "produkcja:") + { + $self->{curInfo}->{country} .= + $self->{curInfo}->{country} ? ", " . $origtext : $origtext; + } + if ($self->{key} eq "gatunek:") + { + $self->{curInfo}->{genre} .= + $self->{curInfo}->{genre} ? ", " . $origtext : $origtext; + } + } + elsif ($self->{isCast}) + { + if ($self->{inside}->{h3}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{isRole} = 1; + } + else + { + if ($self->{isRole} + && ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS)) + { + # As we incremented it above, we have one more + # chance here to add a role Without <= we would skip + # the role for last actor + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter}-1 ]}, $origtext + } + $self->{isRole} = 0; + } + } + elsif ($self->{isTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{isTime} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + country => 1, + }; + + $self->{isMovie} = 0; + $self->{isYear} = 0; + $self->{isCountry} = 0; + $self->{curName} = undef; # why? + $self->{curUrl} = undef; # why? + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + + if ($self->{parsingList}) + { + $html =~ s|</?b>||gms; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # Grab the home page first to receive a fresh, valid cookie + my $response = $ua->get('http://www.filmweb.pl/'); + + return "http://www.filmweb.pl/search?q=$word&alias=film"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url =~ /^http:/; + return "http://www.filmweb.pl" . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return 'FilmWeb'; + } + + sub getExtra + { + return ''; + } + + + sub getCharset + { + my $self = shift; + + return 'ISO-8859-2'; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'PL'; + } + + sub getDefaultPictureSuffix + { + return '.jpg'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCIbs.pm b/lib/gcstar/GCPlugins/GCfilms/GCIbs.pm new file mode 100644 index 0000000..2cb141d --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCIbs.pm @@ -0,0 +1,409 @@ +package GCPlugins::GCfilms::GCIbs; +################################################### +# +# Copyright 2008 t-storm +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginIbs; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + if ($tagname eq "a") + { + if ($attr->{href} =~ m/mymovies\/list\?pending\&add=([0-9]*)/) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = '/title/tt' . $1 . '/'; + } + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if ( ($url =~ /^http:\/\/www.ibs.it\/dvd\/[0-9]+\//) + && (!$self->{alreadyListed}->{$url})) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{alreadyListed}->{$url} = 1; + } + } + elsif ($tagname eq 'td') + { + if ($attr->{class} eq 'ttitolettobianco') + { + $self->{isYear} = 1; + $self->{isMovie} = 0; + } + } + } + else + { + if ($tagname eq "a") + { + $self->{currentHref} = $attr->{href}; + + if ($attr->{href} =~ +m/javascript:Jackopen\('http:\/\/giotto.internetbookshop.it\/cop\/copdjc.asp\?e=([0-9]+)'\)/ + ) + { + $self->{curInfo}->{image} = + "http://giotto.internetbookshop.it/cop/copdjc.asp?e=$1"; + } + if ($attr->{href} =~ m/^\/film\/regista\//) + { + $self->{insideDirector} = 1; + } + elsif ($attr->{href} =~ m/^\/film\/attore\//) + { + $self->{insideActors} = 1; + $self->{insideRoles} = 0; + $self->{insideDirector} = 0; + } + else + { + $self->{insideSynopsis} = 0 if ($attr->{href} =~ m/plotsummary/); + $self->{insideGenre} = 1 + if ($attr->{href} =~ m|/Sections/Genres/|) + && !($self->{curInfo}->{synopsis} + || $self->{curInfo}->{country} + || $self->{curInfo}->{time}); + } + } + elsif ($tagname eq 'td') + { + if ($attr->{class} eq 'lbarrasup') + { + $self->{isMovie} = 1; + $self->{insideSynopsis} = 0; + } + } + elsif ($tagname eq "SPAN") + { + if ($self->{inside}->{langue}) + { + $self->{inside}->{langueLANG} = 1; + $self->{inside}->{langueCODEC} = 0; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + if ($tagname eq "SPAN") + { + if ($self->{inside}->{langue}) + { + $self->{inside}->{langueLANG} = 0; + $self->{inside}->{langueCODEC} = 1; + } + } + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $self->{listDate}; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isYear}) + { + $origtext =~ /([0-9]+)/; + $self->{listDate} = $1; + $self->{isYear} = 0; + } + if ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 0; + $self->{isDirector} = 0; + return; + } + $self->{isDirector} = 1 if $origtext =~ m/Regia di /; + } + else + { + $self->{inside}->{langue} = 0 if $origtext =~ m/Lingua sottotitoli/; + if ($self->{insideGenre}) + { + $origtext =~ s/\s*$//; + $self->{curInfo}->{genre} .= $self->capWord($origtext) . ','; + $self->{curInfo}->{genre} =~ s|\s*/\s*|,|g; + $self->{insideGenre} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + $self->{insideSynopsis} = 0; + } + elsif ($self->{isCountry}) + { + $origtext =~ /(.+), (.+)/; + $self->{curInfo}->{country} .= $1; + $self->{curInfo}->{date} = $2; + $self->{isCountry} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{curInfo}->{time} =~ s/.*?://; + $self->{insideTime} = 0; + } + elsif ($self->{insideActors}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{insideActors} = 0; + } + elsif ($self->{insideRoles}) + { + # As we incremented it above, we have one more chance here to add a role + # Without <= we would skip the role for last actor + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} - 1 ]}, + $origtext + if ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{insideRoles} = 0; + } + elsif ($self->{inside}->{langue}) + { + if ($self->{inside}->{span}) + { + $self->{curInfo}->{language} = $origtext; + } + else + { + $origtext =~ s/^, //; + $origtext =~ s/ - $//; + push @{$self->{curInfo}->{audio}}, + [ $self->{curInfo}->{language}, $origtext ]; + } + } + elsif ($self->{inside}->{soustitre}) + { + my @sottotitoli = split(' - ', $origtext); + my $subss; + foreach $subss (@sottotitoli) + { + push @{$self->{curInfo}->{subt}}, [$subss]; + } + + $self->{inside}->{soustitre} = 0; + } + elsif ($self->{isMovie}) + { + + if ($self->{isMovie1}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isMovie1} = 0; + } + elsif ($self->{isMovie2}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isMovie} = 0; + $self->{isMovie2} = 0; + } + } + else + { + if ($origtext =~ m{User\s+Rating:\s+(\d+\.\d+)/10\s+}) + { + $self->{curInfo}->{ratingpress} = int($1 + 0.5); + } + ; # if + } + ; # if + + if ($origtext eq "Titolo") + { + $self->{isMovie1} = 1; + $self->{isMovie2} = 0; + } + elsif ($origtext eq "Titolo originale") + { + $self->{isMovie1} = 0; + $self->{isMovie2} = 1; + } + elsif ($origtext eq "Paese, Anno") + { + $self->{isCountry} = 1; + } + elsif ($origtext eq "Dati tecnici") + { + $self->{insideTime} = 1; + } + elsif ($origtext eq "Genere") + { + $self->{insideGenre} = 1; + } + elsif ($origtext eq "Descrizione") + { + $self->{insideSynopsis} = 1; + } + elsif ($origtext =~ m/Vietato ai minori di ([0-9]+) anni/) + { + $self->{curInfo}->{age} = $1; + } + elsif ($origtext eq "Lingua audio") + { + $self->{inside}->{langue} = 1; + } + elsif ($origtext eq "Lingua sottotitoli") + { + $self->{inside}->{langue} = 0; + $self->{inside}->{soustitre} = 1; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + $html =~ s{</?(?:b|small)>}{}gi; + + if ($self->{parsingList}) + { + $self->{alreadyListed} = {}; + } + else + { + $html =~ s|<a href="synopsis">[^<]*</a>||gi; + $html =~ s|<a href="/name/.*?">([^<]*)</a>|$1|gi; + $html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi; + #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi; + $self->{curInfo}->{actors} = []; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.ibs.it/dvd/ser/serpge.asp?ty=kw&dh=100&SEQ=Q&T=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url =~ /^http:/; + return "http://www.ibs.it" . $url; + } + + sub getName + { + return "Internet Bookshop"; + } + + sub getAuthor + { + return 't-storm'; + } + + sub getLang + { + return 'IT'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm new file mode 100644 index 0000000..70af804 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm @@ -0,0 +1,439 @@ +package GCPlugins::GCfilms::GCImdb; + +################################################### +# +# Copyright 2010 groms +# +# Features: +# + Multiple directors separated by comma +# + Multiple countries separated by comma +# + Correct URL in case of redirection +# + Fetches Original Title +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginImdb; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url})) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + $self->{alreadyListed}->{$url} = 1; + } + } + } + else + { + + if ($tagname eq "link") + { + if ($attr->{rel} eq "canonical") + { + $self->{curInfo}->{webPage} = $attr->{href}; + } + } + elsif ($tagname eq "h1") + { + if ($attr->{class} eq "header") + { + $self->{insideHeader} = 1; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} eq "infobar") + { + $self->{insideInfobar} = 1; + } + } + elsif ($tagname eq "table") + { + if ($attr->{class} eq "cast_list") + { + $self->{insideCastList} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{itemprop} eq "ratingValue") + { + $self->{insideRating} = 1; + } + elsif ($attr->{class} eq "title-extra") + { + $self->{insideOriginalTitle} = 1; + } + } + elsif ($tagname eq "img") + { + if ($self->{insidePrimaryImage}) + { + if (!($attr->{src} =~ m/nopicture/)) + { + ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./; + } + } + elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|) + { + my $cert = $attr->{title}; + $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open'); + $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved'); + $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP'); + $self->{curInfo}->{age} = 13 if $cert eq 'PG_13'; + $self->{curInfo}->{age} = 17 if $cert eq 'R'; + $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X'); + } + } + elsif ($tagname eq "a") + { + if ($self->{insideHeader} && $attr->{href} =~ m/year/) + { + $self->{insideYear} = 1; + } + elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/) + { + $self->{insideGenre} = 1; + } + } + elsif ($tagname eq 'td') + { + if ($self->{insideCastList}) + { + if ($attr->{class} eq 'name') + { + $self->{insideActor} = 1; + } + elsif ($attr->{class} eq 'character') + { + $self->{insideRole} = 1; + } + } + elsif ($attr->{id} eq "img_primary") { + $self->{insidePrimaryImage} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + if ($self->{parsingList}) + { + if ($self->{isMovie} && ($tagname eq 'a')) + { + $self->{isMovie} = 0; + my $url = $self->{itemsList}[$self->{itemIdx}]->{url}; + if (!$self->{itemsList}[$self->{itemIdx}]->{title}) + { + $self->{alreadyListed}->{$url} = 0; + $self->{itemIdx}--; + } + } + } else { + if ($tagname eq "h1") + { + $self->{insideHeader} = 0; + } + elsif ($tagname eq "a") + { + $self->{insideYear} = 0; + $self->{insideGenre} = 0; + $self->{insideActor} = 0; + $self->{insideRole} = 0; + } + elsif ($tagname eq "div") + { + $self->{insideInfobar} = 0; + $self->{insideNat} = 0; + $self->{insideDirector} = 0; + $self->{insideStoryline} = 0; + $self->{insideReleaseDate} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideRating} = 0; + $self->{insideOriginalTitle} = 0; + } + elsif ($tagname eq "table") + { + $self->{insideCastList} = 0; + } + elsif ($tagname eq "td") + { + $self->{insidePrimaryImage} = 0; + } + elsif ($self->{insideCastList}) + { + if ($self->{actor} && $self->{role}) + { + $self->{actor} =~ s/^\s+|\s+$//g; + $self->{actor} =~ s/\s{2,}/ /g; + push @{$self->{curInfo}->{actors}}, [$self->{actor}]; + $self->{role} =~ s/^\s+|\s+$//g; + $self->{role} =~ s/\s{2,}/ /g; + push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role}; + $self->{actorsCounter}++; + } + $self->{actor} = ""; + $self->{role} = ""; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/^\s+|\s+$//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i) + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + if ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isInfo}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|; + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideHeader}) + { + if ($self->{insideYear}) + { + $self->{curInfo}->{date} = $origtext; + } + elsif (!$self->{curInfo}->{title}) + { + $self->{curInfo}->{title} = $origtext; + if (!$self->{curInfo}->{original}) + { + $self->{curInfo}->{original} = $origtext; + } + } + elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i}) + { + $self->{curInfo}->{original} = $origtext; + } + } + elsif ($self->{insideInfobar}) + { + if ($self->{insideGenre}) + { + if ($self->{curInfo}->{genre}) + { + $self->{curInfo}->{genre} .= ","; + } + $self->{curInfo}->{genre} .= $origtext; + } + elsif ($origtext =~ m/([0-9]+ min)/) + { + $self->{curInfo}->{time} = $1; + } + } + elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/) + { + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideNat}) + { + if ($origtext =~ m/[^\s].+/) + { + if ($self->{curInfo}->{country} =~ m/.+/) + { + $self->{curInfo}->{country} .= ", ".$origtext; + } + else + { + $self->{curInfo}->{country} = $origtext; + } + } + } + elsif ($self->{insideCastList}) + { + if ($self->{insideActor}) + { + $self->{actor} .= $origtext; + } + elsif ($self->{insideRole}) + { + $self->{role} .= $origtext; + } + } + elsif ($self->{insideStoryline} && $self->{inside}{p}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideStoryline} = 0; + } + elsif ($self->{insideDirector} && $self->{inside}->{div}) + { + $origtext =~ s/,/, /; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) { + if ($origtext =~ m/([0-9]{4})/) + { + $self->{curInfo}->{date} = $1; + $self->{insideReleaseDate} = 0; + } + } + + if ($self->{inside}->{h2}) + { + $self->{insideStoryline} = 1 if ($origtext eq "Storyline"); + } + elsif ($self->{inside}->{h4}) + { + $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/; + $self->{insideTime} = 1 if $origtext =~ m/Runtime:/; + $self->{insideNat} = 1 if $origtext =~ m/Country:/; + $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + if ($self->{parsingList}) + { + $self->{alreadyListed} = {}; + } + else + { + #$html =~ s|<a href="synopsis">[^<]*</a>||gi; + #$html =~ s|<a href="/name/.*?"[^>]*>([^<]*)</a>|$1|gi; + #$html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi; + #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi; + + # Commented out this line, causes bug #14420 when importing from named lists + #$self->{curInfo}->{actors} = []; + } + + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.imdb.com/find?s=tt&q=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.imdb.com" if $url eq ""; + return $url if $url =~ /^http:/; + return "http://www.imdb.com".$url; + } + + sub getName + { + return "IMDb"; + } + + sub getAuthor + { + return 'groms'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm b/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm new file mode 100644 index 0000000..d950395 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm @@ -0,0 +1,386 @@ +package GCPlugins::GCfilms::GCKinopoisk; + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginKinopoisk; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{class} eq "all") + { + my $url = $attr->{href}; + if ($url =~ m/\/level\/1\/film/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + if ($attr->{class} eq "orange") + { + $self->{isYear} = 1; + } + } + elsif ($tagname eq "title") + { + $self->{insideHTMLtitle} = 1; + } + } + else + { + if ($attr->{class} eq "moviename-big" && $attr->{style} eq "margin: 0; padding: 0") + { + $self->{insideTitle} = 1; + } + elsif ($tagname eq "span") + { + if ($attr->{style} eq "color: #666; font-size: 13px") + { + $self->{insideOriginal} = 1; + } + elsif ($attr->{class} eq "_reachbanner_" && $self->{insideSynopsis} == 0) + { + $self->{insideSynopsis} = 1; + } + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Byear\%5D/) + { + $self->{insideDate} = 1; + } + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bcountry\%5D/) + { + if ($self->{isCountry} >= 2) + { + $self->{insideCountry} = 1; + $self->{isCountry}++; + } + } + if ($attr->{href} =~ m/\/level\/4\/people/) + { + if ($self->{isDirector} >= 2) + { + $self->{insideDirector} = 1; + $self->{isDirector}++; + } + } + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bgenre\%5D/) + { + $self->{insideGenre} = 1; + $self->{isGenre}++; + } + if ($self->{insideActorList}) + { + $self->{isActors} += 1; + $self->{insideActors} = 1; + } + } + elsif ($tagname eq "td") + { + if ($attr->{class} eq "type") + { + $self->{isDirector} = 1; + $self->{isTime} = 1; + $self->{isCountry} = 1; + } + elsif ($self->{isTime} == 2) + { + $self->{insideTime} = 1; + $self->{isTime} = 0; + } + elsif ($attr->{style} eq "vertical-align: top; height: 15px" && $attr->{align} eq "right" && $self->{isActors} >= 0) + { + $self->{isActors} += 1; + $self->{insideActors} = 1; + } + } + elsif ($tagname eq "img" && $attr->{style} eq "border: none; border-left: 10px #f60 solid") + { + if ($attr->{src} ne "/images/image_none.gif") + { + $self->{curInfo}->{image} = "http://www.kinopoisk.ru".$attr->{src}; + } + } + } + } + + sub text + { + my ($self, $origtext) = @_; + return if ($self->{parsingEnded}); + if ($self->{parsingList}) + { + if (($self->{insideHTMLtitle})) + { + if ($origtext =~ m/Результаты\sпоиска/) + { + # + } + else + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + $self->{insideHTMLtitle} = 0; + } + if ($self->{isMovie}) + { + my ($title, $date); + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + return; + } + elsif ($self->{isYear}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; + $self->{isYear} = 0; + return; + } + } + else + { + if ($origtext =~ m/В\s*главных\s*ролях:/) + { + $self->{insideActorList} = 1; + } + if ($origtext =~ m/Роли\s*дублировали:/) + { + $self->{insideActorList} = 0; + } + if ($self->{insideTitle}) + { + $origtext =~ s/\s+$//; + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideOriginal}) + { + $origtext =~ s/^\s+//; + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideCountry} == 1) + { + if ($self->{isCountry} == 3) + { + $self->{curInfo}->{country} = $origtext; + } + elsif ($self->{isCountry} > 3) + { + $self->{curInfo}->{country} = $self->{curInfo}->{country}.", ".$origtext; + } + $self->{insideCountry} = 0; + } + elsif ($self->{insideDirector}) + { + if ($self->{isDirector} == 3) + { + $self->{curInfo}->{director} = $origtext; + } + elsif ($self->{isDirector} > 3) + { + $self->{curInfo}->{director} = $self->{curInfo}->{director}.", ".$origtext; + } + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + if ($self->{isActors} == 1) + { + $self->{curInfo}->{actors} = $origtext; + } + elsif ($self->{isActors} > 1) + { + if ($origtext eq "...") + { + $self->{isActors} = -1; + } + else + { + $self->{curInfo}->{actors} = $self->{curInfo}->{actors}.", ".$origtext; + } + } + $self->{insideActors} = 0; + } + elsif ($self->{insideSynopsis} == 1) + { + #$origtext =~ s/^\s+//; + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 2; + } + elsif ($self->{isTime} == 1 || $self->{isDirector} == 1 || $self->{isCountry} == 1) + { + $self->{isDirector} = 0; + $self->{isTime} = 0; + $self->{isCountry} = 0; + if ($origtext eq "время") + { + $self->{isTime} = 2; + } + elsif ($origtext eq "режиссер") + { + $self->{isDirector} = 2; + } + elsif ($origtext eq "страна") + { + $self->{isCountry} = 2; + } + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + if ($self->{isGenre} == 1) + { + $self->{curInfo}->{genre} = $origtext; + } + elsif ($self->{isGenre} > 1) + { + $self->{curInfo}->{genre} = $self->{curInfo}->{genre}.", ".$origtext; + } + $self->{insideGenre} = 0; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + if ($self->{parsingList}) + { + # Your code for processing search results here + } + else + { + if ($tagname eq "tr" && $self->{isDirector} >= 2) + { + $self->{isDirector} = 0; + } + elsif ($tagname eq "tr" && $self->{isGenre} != 0) + { + $self->{isGenre} = 0; + } + elsif ($tagname eq "td") + { + $self->{insideActorList} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{isYear} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isTime} = 0; + $self->{isGenre} = 0; + $self->{isCountry} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{insideActorList} = 0; + return $self; + } + + sub getName + { + return "Kinopoisk"; + } + + sub getAuthor + { + return 'Nazarov Pavel'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "windows-1251"; + } + + sub getSearchCharset + { + my $self = shift; + return "windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.kinopoisk.ru/index.php?kp_query=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + return "http://www.kinopoisk.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/…/\.\.\./g; + $html =~ s/\x92/'/g; + $html =~ s/\x93/“/g; + $html =~ s/\x94/”/g; + $html =~ s/—/—/g; + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + $html =~ s/<br><br>/\x0A/g; + return $html; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMediadis.pm b/lib/gcstar/GCPlugins/GCfilms/GCMediadis.pm new file mode 100644 index 0000000..4caf406 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMediadis.pm @@ -0,0 +1,316 @@ +package GCPlugins::GCfilms::GCMediadis; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMediadis; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ( ($attr->{href} =~ m|http://www\.mediadis\.com/video/detail\.asp|) + && ($attr->{class} eq 'a-blue')) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($self->{couldBeCast}==1) + && ($attr->{href} =~ m|http://www\.mediadis\.com/products/search\.asp|)) + { + # yes, found the magic link. director(s) to follow. + $self->{couldBeCast} = 2; + } + } + if (($tagname eq 'td') && ($attr->{class} eq 'search-list')) + { + if ($attr->{align} eq 'center') + { + $self->{couldBeYear} = 1; + } + if (($attr->{align} eq 'left') && ($attr->{colspan} eq '5')) + { + $self->{couldBeCast} = 1; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /^http:\/\/www\.(dvdzone2|mediadis)\.com\/pictures\/big\//) + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + elsif ($tagname eq "p") + { + $self->{insideSynopsis} = 1; + } + elsif ($tagname eq "span") + { + if (($attr->{class} eq "detail-title")) + { + $self->{insideName} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($self->{parsingList}) + { + if ($tagname eq 'tr') { + $self->{couldBeCast} = 0; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + $origtext =~ s/^\s*(\S*)\s*$/$1/; # remove surrouding whitespace + + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $self->capWord($origtext); + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{couldBeYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ m/([0-9]{4})/; + $self->{couldBeYear} = 0; + } + elsif ($self->{couldBeCast} == 2) # waiting for director name + { + if ($origtext eq "-") + { + $self->{couldBeCast} = 3; # read actors now + } + elsif (!$self->{itemsList}[ $self->{itemIdx} ]->{director}) + { + # revert the failed name transposure done my mediadis: + $origtext =~ s/^(.*) (\S+)$/$2 $1/; + # only one entry, no list. + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + } + } + elsif ($self->{couldBeCast} == 3) # waiting for actors names + { + if ($origtext) + { + # revert the failed name transposure done my mediadis: + $origtext =~ s/^(.*) (\S+)$/$2 $1/; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} .= $origtext; + } + } + } + else + { + $origtext =~ s/ : //g if !$self->{insideSynopsis}; + if ($self->{insideRating}) + { + $origtext =~ s{(\d+),(\d+)/10}{$1.$2}; + $self->{curInfo}->{ratingpress} = int ($origtext + 0.5); + $self->{insideRating} = 0; + } + elsif ($self->{insideGenre}) + { + $origtext =~ s/ - /,/g; + # don't scream! Convert all caps to first cap only. + $self->{curInfo}->{genre} .= ucfirst(lc($origtext)); + $self->{insideGenre} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideDirector}) + { + if (!$self->{curInfo}->{director}) + { + my @directors = split(/\s+-\s+/, $origtext); + for (my $i=0; $i<@directors; $i++) + { + # revert the failed name transposure done my mediadis: + $directors[$i] =~ s/^(.*) (\S+)$/$2 $1/; + } + $self->{curInfo}->{director} = join (', ', @directors); + } + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext . "\n\n"; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideActors}) + { + foreach my $name (split(/\s+-\s+/, $origtext)) + { + # revert the failed name transposure done my mediadis: + # move the first name part back in front. + $name =~ s/^(.*) (\S+)$/$2 $1/; + # and store the actors in a proper list. + push @{$self->{curInfo}->{actors}}, [$name] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + $self->{insideActors} = 0; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $self->capWord($origtext) if !$self->{curInfo}->{original}; + $self->{insideOrig} = 0; + } + elsif (($self->{inside}->{span}) && ($self->{insideName})) + { + $self->{curInfo}->{title} = $self->capWord($origtext) if !$self->{curInfo}->{title}; + } + elsif ($self->{inside}->{strong}) + { + $self->{insideDate} = 1 if $origtext =~ m/Year/; + $self->{insideDirector} = 1 if $origtext =~ m/Director\(s\)/; + $self->{insideGenre} = 1 if $origtext =~ m/Genres/; + $self->{insideOrig} = 1 if $origtext =~ m/Original title/; + $self->{insideTime} = 1 if $origtext =~ m/Duration/; + $self->{insideNat} = 1 if $origtext =~ m/Country/; + $self->{insideActors} = 1 if $origtext =~ m/Actors/ + or $origtext =~ m/Voice of/; + } + if ($self->{inside}->{td}) + { + if ($origtext =~ m/Global rating/) + { + $self->{insideRating} = 1; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, # hide the date as it is wrong most of the time + director => 1, + actors => 1 + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s|<a (class="underline" )?href="http://www\.mediadis\.com/products/search\.asp\?par=[0-9]*" title="Filmography">([^<]*)</a>|$2|g; + $html =~ s/ / /g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.mediadis.com/video/search.asp?t=19&pl=all&kw=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return 'http://www.mediadis.com/video/'; + } + + sub getName + { + return 'Mediadis'; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMetropoliES.pm b/lib/gcstar/GCPlugins/GCfilms/GCMetropoliES.pm new file mode 100644 index 0000000..f628a33 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMetropoliES.pm @@ -0,0 +1,382 @@ +package GCPlugins::GCfilms::GCMetropoliES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMetropoliES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + +# Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html +# el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa + $html =~ s/^.*(<table width="100%" border="0" cellspacing="0" cellpadding="5">.*<\/td>\n <\/tr>\n<\/table>)\n\n\n.*$/$1/gs; + + # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html + $html =~ s/^.*(<table width="100%" border="0" cellspacing="0" cellpadding="5">.*<\/td>\n <\/tr>\n<\/table>)\n<table.*$/$1/gs; + return $html; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + # Código para procesar el resultado de la busqueda + if ($self->{parsingList}) + { + if ($self->{isDate} eq 2) + { + $self->{isDate} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext; + $self->{isTitle} = 1; + return; + } + + if ($self->{isTitle} eq 2) + { + $self->{isTitle} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isOrigTit} = 1; + return; + } + if ($self->{isOrigTit} eq 2) + { + $self->{isOrigTit} = 0; + $self->{isDirector} = 1; + return; + } + + if ($self->{isDirector} eq 2) + { + $self->{isDirector} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{insedeInfos} = 0; + return; + } + return; + } + + else + { + $origtext =~ s/\s{2,}//g; + #$origtext =~ s/\n//g if !$self->{insideSynopsis}; + if ($self->{insideName}) + { + if ($origtext =~ /([^\(]*) \(([0-9]{4})\)/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + } + $self->{insideName} = 0; + } + if ($self->{inside}->{td}) + { + if ($origtext =~ /(.*), (.*), (.*) Min\./) + { + $self->{curInfo}->{original} = $1; + $self->{curInfo}->{country} = $2; + $self->{curInfo}->{time} = $3; + } + elsif ($self->{insideActors}) + { + $self->{insideActors}--; + if ($self->{insideActors} eq 0) + { + $self->{insideActors} = 0; + $self->{curInfo}->{actors} = $origtext; + } + } + } + if ($self->{insideDirector}) + { + $self->{insideDirector} = 0; + $self->{curInfo}->{director} = $origtext; + } + + if ($self->{inside}->{span}) + { + if ($origtext =~ /Int.rpretes:/) + { + $self->{insideActors} = 2; + } + } + if ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + $self->{insideInfos} = 0; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + # Código para procesar el resultado de la busqueda + #if ($self->{parsingList}){ + #} + # Código para procesar la información de la pelicula seleccionada + #else { + #} + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + # Código para procesar el resultado de la busqueda para generar el listado + if ($self->{parsingList}) + { + # Comprobamos si estamos dentro de un tr con la info de un titulo + if (($tagname eq "tr") && (($attr->{bgcolor} eq "#ECF5FF") || ($attr->{bgcolor} eq "#FFFFFF"))) + { + $self->{insideInfos} = 1; + # Lo primero a leer es la fecha. Indicamos que es el siguiente a procesar + $self->{isDate} = 1; + $self->{isTitle} = 0; + $self->{isOrigTit} = 0; + $self->{isDirector} = 0; + # Aumentamos el número de resultados encontrados + $self->{itemIdx}++; + return; + } + + # Comprobamos que campo de la información estamos pocesando + if ($tagname eq "td" && $self->{insideInfos}) + { + $self->{isDate} = 2 if $self->{isDate} eq 1; + $self->{isOrigTit} = 2 if $self->{isOrigTit} eq 1; + $self->{isDirector} = 2 if $self->{isDirector} eq 1; + } + if ($tagname eq "a" && $self->{isTitle}) + { + $self->{isTitle} = 2; + # Guardamos la Url del enlace + my $url = $attr->{href}; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + # Código para procesar la información de la pelicula seleccionada + else + { + # Si estamos dentro de una imagen y no se ha asignado ninguna, la asignamos + if (($tagname eq "img") & !$self->{curInfo}->{image}) + { +# Imágenes en cmg: +# Thumb http://carteles.metropoliglobal.com/galerias/data/1149/1563-2008-rastrooculto-espanol-210459-thumb.jpg +# Normal: http://carteles.metropoliglobal.com/galerias/data/1149/1563-2008-rastrooculto-espanol-210459.jpg +# Extraemos la dirección de la imagen a partir del thumb + if ($attr->{src} =~ /\.\.\/(galerias\/data\/[0-9]*\/.*)-thumb\.jpg/) + { + $self->{curInfo}->{image} = "http://carteles.metropoliglobal.com/" . $1 . ".jpg"; + } + } + + # Comprobamos el rating + if ($tagname eq "img") + { + # En cmg la puntuación está asignada con una imagen con el formato ratingX.gif donde + # X está entre 0 y 5 + if ($attr->{src} =~ /imagenes\/rating([0-5])\.gif/) + { + $self->{curInfo}->{ratingpress} = ($1 / 5) * 10; + } + } + elsif ($tagname eq "span") + { + $self->{insideName} = 1 if $attr->{class} eq "title"; + $self->{insideInfos} = 1 if $attr->{class} eq "title"; + } + elsif ($tagname eq "td") + { + $self->{insideDirector} = 1 if $attr->{width} eq "84%"; + if ($self->{insideInfos}) + { + $self->{insideSynopsis} = 1 if $attr->{colspan} eq "2"; + } + } + } + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + #sub changeUrl + #{ + # my ($self, $url) = @_; + # return $url; + #} + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + #sub getExtra + #{ + # return 'Extra'; + #} + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return "ES"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return "DoVerMan"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return 'CartelesMetropoliGlobal'; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + # Charset de la web + return "iso-8859-1"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + # url contendrá ficha.php?...... + + return "http://carteles.metropoliglobal.com/paginas/$url"; + } + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://carteles.metropoliglobal.com/paginas/ficha.php" + . "?qbtitulo=$word&qbbuscar=titulo&Submit=Buscar&qsec=buscar"; + } + + # Constructor + sub new + { + # Inicialización + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran + # en el listado de resultados + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + # Indica si estamos procesando información útil + $self->{insideInfos} = 0; + + # Indican el estado del procesado del listado de resultados (0 no procesar, 1 es el siguiente, 2 procesando) + $self->{isDate} = 0; + $self->{isTitle} = 0; + $self->{isOrigTit} = 0; + $self->{isDirector} = 0; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMonsieurCinema.pm b/lib/gcstar/GCPlugins/GCfilms/GCMonsieurCinema.pm new file mode 100644 index 0000000..1e989c2 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMonsieurCinema.pm @@ -0,0 +1,272 @@ +package GCPlugins::GCfilms::GCMonsieurCinema; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMonsieurCinema; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ /^http\:\/\/cinema\.tiscali\.fr\/fichefilm\.aspx/) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ + m|^http\://media\.monsieurcinema\.com/film/[0-9]*/[0-9]*/[0-9]*\.jpg|) + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + elsif ($tagname eq "b") + { + if ($attr->{class} eq "sous_titre") + { + $self->{insideName} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} eq "sous_titre") + { + $self->{insideDate} = 1; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} eq "movie_infos") + { + $self->{insideInfos} = 1; + } + elsif ($attr->{align} eq "justify") + { + $self->{insideSynopsis} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($origtext =~ /, de ([^(]*)�\(([0-9]{4})\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"director"} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $2; + } + } + else + { + $origtext =~ s/\s{2,}//g; + $origtext =~ s/\[endline\]/\n/g if !$self->{insideSynopsis} && !$self->{insideCast}; + + if ($self->{insideName}) + { + $self->{curInfo}->{title} = $self->capWord($origtext); + $self->{insideName} = 0; + } + elsif ($self->{insideDate}) + { + if ($origtext =~ /\(([0-9]{4})\)/) + { + $self->{curInfo}->{date} = $1; + $self->{insideCast} = 1; + } + $self->{insideDate} = 0; + } + elsif ($self->{insideInfos}) + { + if (($origtext =~ /Genre\s*\:\s*(.*)/) || ($origtext =~ /Catégorie\s*\:\s*(.*)/)) + { + $self->{curInfo}->{genre} .= $self->{curInfo}->{genre} ? "," . $1 : $1; + $self->{curInfo}->{genre} =~ s/, /,/g; + } + elsif ($origtext =~ /Durée\s*\:\s*(.*)/) + { + $self->{curInfo}->{time} = $1; + } + elsif ($origtext =~ /Pays\s*\:\s*(.*)/) + { + $self->{curInfo}->{country} = $1; + } + elsif ($origtext =~ /Public\s*\:\s*(.*)/) + { + if ($1 eq 'Tous publics') + { + $self->{curInfo}->{age} = 2; + } + else + { + $self->{curInfo}->{age} = $1; + $self->{curInfo}->{age} =~ s/.*?([0-9]+).*/$1/; + } + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ s/\[endline\]/\n/g; + $self->{curInfo}->{synopsis} = $origtext if !$self->{curInfo}->{synopsis}; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideCast}) + { + $origtext =~ s/\[endline\]//g; + $origtext =~ s/
| +//g; + if ($origtext =~ /de(.*)avec(.*)/) + { + $self->{curInfo}->{director} = $1; + $self->{curInfo}->{actors} = $2; + } + $self->{insideCast} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{</?b>}{}g; + $html =~ s/<br>/\[endline\]/gi; + $html =~ s{<span style="text-transform\:uppercase;">([^<]*)</span>} + {$1}g; + $html =~ s{<div style="float\:left;width\:100px">([^<]*)</div>[^<]*<div style="float\:left;">([^<]*)</div>} + {<div class="movie_infos">$1 \: $2</div>}g; + $html =~ s{<a href="http\://cinema\.tiscali\.fr/recherche\.aspx\?file=http&keys=[^"]*">([^<]*)</a>} + {$1}g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://cinema.tiscali.fr/recherche.aspx?file=http&keys=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return "http://cinema.tiscali.fr/"; + } + + sub getName + { + return "MonsieurCinema.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + return "utf8"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMovieMeter.pm b/lib/gcstar/GCPlugins/GCfilms/GCMovieMeter.pm new file mode 100644 index 0000000..f325817 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMovieMeter.pm @@ -0,0 +1,429 @@ +package GCPlugins::GCfilms::GCMovieMeter; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# Copyright 2007 Petr Gajdusek (Pajdus) <gajdusek.petr@centrum.cz> +# Copyright 2007 Mattias de Hollander (MaTiZ) <mdehollander@gmail.com> +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginMovieMeter; + + use HTTP::Cookies; + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + my $url; + + # Your code here + + my $response = + $self->{ua}->post("http://www.moviemeter.nl/film/search", [ 'search[title]' => $word ]); + $url = return "http://www.moviemeter.nl/film/searchresults/"; + + return $url; + } + + # getItemUrl + # Used to get the full URL of a movie page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.moviemeter.nl'; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + #return "WINDOWS-1250"; + return "ISO-8859-1"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "MovieMeter.nl"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'MaTiZ'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'NL'; + } + + # hasSearchYear + # Used to hide year column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchYear + { + return 1; + } + + # hasSearchDirector + # Used to hide director column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchDirector + { + return 0; + } + + # hasSearchActors + # Used to hide actors column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchActors + { + return 0; + } + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return 'Original Title'; + #return ''; + } + + # changeUrl + # Can be used to change URL if movie URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + + # Your code for processing search results here + if ($tagname eq "a") + { + if ($attr->{href} =~ m/\/film\/[0-9]+/) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} =~ /filmresults/) + { + $self->{isYear} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} =~ /subtext/) + { + $self->{altTitle} = 1; + } + } + } + else + { + # Your code for processing movie information here + if ($tagname eq "h1") + { + $self->{insideName} = 1; + } + elsif ($tagname eq "img") + { + if ($attr->{class} eq "poster") + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + elsif ($tagname eq "a") + { + if ($self->{insideFilmInfo}) + { + if ($attr->{href} =~ /director/) + { + $self->{insideFilmDir} = 1; + $self->{filminfo_dir} += 1; + } + } + } + elsif ($tagname eq "div") + { + if ($attr->{id} eq "film_info") + { + $self->{insideFilmInfo} = 1; + $self->{filminfo_id} = 0; + } + elsif ($attr->{id} eq "beslistresults") + { + $self->{insideFilmInfo} = 0; + } + elsif ($attr->{id} eq "film_votes") + { + $self->{insideRating} = 1; + } + + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{parsingList}) + { + # Your code for processing search results here + + } + else + { + # Your code for processing movie information here + + # optional + if ($tagname eq "div") + { + if ($self->{insideRating}) + { + $self->{insideRating} = 0; + } + } + elsif ($tagname eq "a") + { + if ($self->{insideFilmDir}) + { + $self->{insideFilmDirOUT} = 1; + $self->{insideFilmDir} = 0; + } + } + } + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + # Your code for processing search results here + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isYear}) + { + # Remove brackets from year: from (2002) to 2002 + $origtext =~ s/(\)|\()//g; + # Remove leading or trailing whitespace + $origtext =~ s/^\s+|\s+$//g; + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $origtext; + $self->{isYear} = 0; + } + elsif ($self->{altTitle}) + { + $origtext =~ /Alternatieve titel:\s(.*)/; + $self->{itemsList}[ $self->{itemIdx} ]->{"extra"} = + $self->{itemsList}[ $self->{itemIdx} ]->{"title"}; + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $1; + $self->{altTitle} = 0; + } + + } + else + { + # Your code for processing movie information here + if ($self->{insideName}) + { + # First try to use the search results information, otherwise + # parse the movie information + my $title = $self->{itemsList}[ $self->{wantedIdx} ]->{"title"}; + if ($title) + { + $self->{curInfo}->{title} = $title; + $self->{curInfo}->{date} = $self->{itemsList}[ $self->{wantedIdx} ]->{"date"}; + $self->{curInfo}->{original} = + $self->{itemsList}[ $self->{wantedIdx} ]->{"extra"}; + } + + else + { + # Split Little Miss Sunshine (2006) into title and year + my ($title, $year) = ($origtext =~ /(\D+)\s\((\d+)\)/); + $self->{curInfo}->{title} = $title; + $self->{curInfo}->{date} = $year; + } + $self->{insideName} = 0; + } + elsif ($self->{insideFilmInfo}) + { + $self->{filminfo_id} += 1; + # Country Genre Time + if ($self->{filminfo_id} == 2) + { + my @parts = split("\n", $origtext); + $self->{curInfo}->{country} = $parts[0]; + my $genre = $parts[1]; + $genre =~ s/\s\/\s/,/; + $self->{curInfo}->{genre} = $genre; + my $time = $parts[2]; + $time =~ s/\sminuten//; + $self->{curInfo}->{time} = $time; + } + # Director + elsif ($self->{insideFilmDir}) + { + if (exists $self->{curInfo}->{director}) + { + $self->{curInfo}->{director} = + $self->{curInfo}->{director} . ", " . $origtext; + } + else + { + $self->{curInfo}->{director} = $origtext; + + } + } + if ($origtext =~ s/\nmet\s//) + { + my @parts = split("\n\n", $origtext); + $self->{curInfo}->{synopsis} = $parts[1]; + $parts[0] =~ s/ en /, /; + foreach my $actor (split("\s*,\s*", $parts[0])) + { + push @{$self->{curInfo}->{actors}}, [$actor] + if $self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS; + $self->{actorsCounter}++; + } + } + } + elsif ($self->{insideRating}) + { + # Use a dot instead of a comma as decimal seperator + $origtext =~ s/,/./; + # Scale rating to a maximum of 10 + # and round to integer + $self->{curInfo}->{ratingpress} = int($origtext * 2 + 0.5); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{ua}->cookie_jar(HTTP::Cookies->new); + + # Do your init stuff here + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # replace <BR> and <P> tags with \n (also, </BR>,</P>, <P/>, <BR/> ) + $html =~ s/\<(\/)?(BR|P)(\s*\/)?\>/\n/mgi; + + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMoviecovers.pm b/lib/gcstar/GCPlugins/GCfilms/GCMoviecovers.pm new file mode 100644 index 0000000..f1a5e6c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMoviecovers.pm @@ -0,0 +1,246 @@ +package GCPlugins::GCfilms::GCMoviecovers; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMoviecovers; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if (($attr->{href} =~ /^\/film\/titre_/) && ($self->{inside}->{li})) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + my $src = $attr->{src}; + my $alt = $attr->{alt}; + if (!$self->{curInfo}->{image}) + { + if ($alt =~ /^Recto/) + { + $src =~ s/http\:\/\/www\.moviecovers\.com\/DATA\/thumbs\/films\-[A-Za-z0-9-]+\/(.*)/$1/; + $self->{curInfo}->{image} = + "http://data.moviecovers.com/DATA/zipcache/" . $src; + } + } + } + } + + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + + if ($self->{inside}->{li}) + { + my $element = undef; + if ($origtext =~ /^ \([0-9]{4}\)/) + { + $origtext =~ s/ \(([0-9]{4})\)/$1/; + $element = "date"; + $self->{isInfo} = 0; + } + $self->{itemsList}[ $self->{itemIdx} ]->{$element} = $origtext + if $element; + } + + } + else + { + + if ($self->{inside}->{title}) + { + $self->{curInfo}->{title} = $origtext if length($origtext) > 2; + } + + if ($self->{inside}->{td}) + { + if ($self->{insideOriginal}) + { + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif (($self->{insideGenre}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + elsif (($self->{insideDirector}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif (($self->{insideNat}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideDate} && ($self->{inside}->{a})) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif (($self->{insideActors}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{actors} .= $origtext . ', ' + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + elsif ($self->{insideSynopsis}) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + } + } + if ($self->{inside}->{th}) + { + $self->{insideDirector} = 1 if $origtext =~ m/Réalisateur/; + $self->{insideActors} = 1 if $origtext =~ m/Acteurs principaux/; + $self->{insideGenre} = 1 if $origtext =~ m/Genre/; + $self->{insideTime} = 1 if $origtext =~ m/Durée/; + $self->{insideNat} = 1 if $origtext =~ m/Nationalité/; + $self->{insideDate} = 1 if $origtext =~ m/Année/; +# $self->{insideSynopsis} = 1 if $origtext =~ m/Résumé/; + $self->{insideOriginal} = 1 if $origtext =~ m/Titre original/; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.moviecovers.com/multicrit.html?titre=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.moviecovers.com" . $url; + } + + sub getName + { + return "MovieCovers.com"; + } + + sub getAuthor + { + return 'Patrick Fratczak'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + return "ISO-8859-1"; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCNasheKino.pm b/lib/gcstar/GCPlugins/GCfilms/GCNasheKino.pm new file mode 100644 index 0000000..9e093c7 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCNasheKino.pm @@ -0,0 +1,222 @@ +package GCPlugins::GCfilms::GCNasheKino; + +################################################### +# +# Copyright 2005-2009 zserghei +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginNasheKino; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if ($attr->{class} eq "ab10" && $url =~ m/\/data.movies\?id/) + { + $self->{isMovie} = 1; + $self->{isDate} = 2; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + if ( $tagname eq "a" + && $attr->{class} eq "ab10" + && $self->{isDate} == 2) + { + $self->{isDate} = 1; + } + } + else + { + if ( $tagname eq "a" + && $attr->{class} eq "ab10" + && $self->{inside}->{h1}) + { + $self->{insideDate} = 1; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + return; + } + elsif ($self->{isDate} == 1) + { + if ($origtext =~ m/([0-9]+)\sг/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + $self->{isDate} = 0; + } + } + } + else + { + utf8::decode($origtext); + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{inside}->{h1}) + { + $self->{curInfo}->{title} = $origtext + if !$self->{curInfo}->{title}; + } + if ($self->{insideDate}) + { + if ($origtext =~ m/([0-9]+)\sг/) + { + $self->{curInfo}->{date} = $1; + if ($self->{curInfo}->{date} < 1992) + { + $self->{curInfo}->{country} = "СССР"; + } + else + { + $self->{curInfo}->{country} = "Россия"; + } + $self->{curInfo}->{audio} = "русский"; + $self->{insideDate} = 0; + } + } + if ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + if ($origtext =~ m/\S+/) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + } + elsif ($self->{insideActors}) + { + $self->{insideActors} = 0 if $origtext =~ m/Сценарий:/; + if ( $origtext !~ m/^,/ + && $self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + $self->{curInfo}->{actors} .= ( + $self->{curInfo}->{actors} + ? ", " . $origtext + : $origtext + ); + $self->{actorsCounter}++; + } + } + $self->{insideDirector} = 1 if $origtext =~ m/Режиссер\(ы\):/; + $self->{insideActors} = 1 if $origtext =~ m/Актер\(ы\):/; + $self->{insideSynopsis} = 1 if $origtext =~ m/О\sфильме:/; + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub getName + { + return "NasheKino"; + } + + sub getAuthor + { + return 'zserghei'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "Windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.nashekino.ru/data.find?t=0&yr=&sval=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.nashekino.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + $self->{parsingEnded} = 0; + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOFDb.pm b/lib/gcstar/GCPlugins/GCfilms/GCOFDb.pm new file mode 100644 index 0000000..511ec4e --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOFDb.pm @@ -0,0 +1,304 @@ +package GCPlugins::GCfilms::GCOFDb; + +################################################### +# +# Copyright 2005-2010 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginOFDb; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ( ($attr->{href} =~ m/view\.php\?page=film&fid=[0-9]*/) + || ($attr->{href} =~ m|^film/[0-9]*|)) + { + $self->{isTitle} = 1; + $self->{isInfo} = 0; + $self->{isOriginal} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + } + elsif ($tagname eq "font") + { + if ($self->{isInfo}) + { + $self->{isOriginal} = 1; + } + } + } + else + { + if ($tagname eq "font") + { + if ($attr->{face} eq "Arial,Helvetica,sans-serif") + { + if ($attr->{size} eq "3") + { + $self->{insideName} = 1; + } + elsif ($attr->{size} eq "2") + { + $self->{insideInfosNames} = 1 if $attr->{class} eq "Normal"; + $self->{insideInfos} = 1 if $attr->{class} eq "Daten"; + } + } + } + elsif ($tagname eq "img") + { + if ($attr->{src} =~ m|img\.ofdb\.de/film/[0-9]+/[0-9]*.jpg|) + { + $self->{curInfo}->{image} = $attr->{src} + if !$self->{curInfo}->{image}; + } + elsif ($attr->{src} eq "images/design3/notenspalte.png") + { + $self->{curInfo}->{ratingpress} = int( $attr->{alt} + 0.5 ) + if ! $self->{curInfo}->{ratingpress}; + } + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/view\.php\?page=blaettern&Kat=Land&Text=(.*)/) + { + $self->{insideCountry} = 1; + } + $self->{curInfo}->{date} = $1 + if ($attr->{href} =~ m/view\.php\?page=blaettern&Kat=Jahr&Text=([0-9]{4})/); + } + elsif (($tagname eq "div") && ($attr->{class} eq "synopsis")) + { + $self->{insideSynopsis} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq "tr") + { + $self->{insideDirector} = 0; + $self->{insideActors} = 0; + $self->{insideGenre} = 0; + $self->{insideInfos} = 0; + } + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isTitle} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isOriginal}) + { + $origtext =~ s{^\s*/\s*}{}; + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $origtext; + $self->{isOriginal} = 0; + return; + } + elsif (($self->{isInfo}) && ($origtext =~ m/\((\d{4})\)/)) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideName}) + { + $self->{curInfo}->{title} = $origtext if !$self->{curInfo}->{title}; + $self->{insideName} = 0; + } + elsif ($self->{insideInfosNames}) + { + $self->{insideOrig} = 1 if $origtext =~ m/Originaltitel:/; + $self->{insideDirector} = 1 if $origtext =~ m/Regie:/; + $self->{insideActors} = 1 if $origtext =~ m/Darsteller:/; + $self->{insideGenre} = 1 if $origtext =~ m/Genre\(s\):/; + $self->{insideInfosNames} = 0; + } + elsif ($self->{insideCountry}) + { + $self->{curInfo}->{country} .= ', ' if $self->{curInfo}->{country}; + $self->{curInfo}->{country} .= $origtext; + $self->{insideCountry} = 0; + } + elsif ($self->{insideInfos} && $self->{inside}->{font}) + { + if ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOrig} = 0; + $self->{insideInfos} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} .= + $self->{curInfo}->{director} + ? ', ' . $origtext + : $origtext; + } + elsif ($self->{insideActors}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if $self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS; + $self->{actorsCounter}++; + } + elsif ($self->{insideGenre}) + { + push @{$self->{curInfo}->{genre}}, [$origtext]; + } + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ m/(http.*?)(\s|$)/; + my $page = $self->loadPage($1, 0, 1); + $page =~ m|<font face="Arial,Helvetica,sans-serif" size="2" class="Blocksatz">.*?</a><br>[^<]*</b>(?:</b>)?<br><br>(.*?)</font>|ms; + $self->{curInfo}->{synopsis} = $1; + $self->{curInfo}->{synopsis} =~ s/<br \/>/\n/gi; + $self->{insideSynopsis} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + }; + + $self->{isInfo} = 0; + $self->{isYear} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|onmouseover="[^"]*"||gms; + } + $html =~ s{<a href="view\.php\?page=liste&Name=[^"]*">([^<]*)</a>} + {$1}g; + $html =~ s{<a href="view\.php\?page=genre&Genre=[^"]*">([^<]*)</a>} + {$1}g; + $html =~ s{<font face="Arial,Helvetica,sans-serif" size="2" class="Blocksatz"><p class="Blocksatz"><b>Inhalt:<\/b>\s?([^<]*)<a href="(view\.php\?page=inhalt&fid=[0-9]*&sid=[0-9]*)">\s?<b>\[mehr\]</b></a></p></font>} + {<div class="synopsis">$1\nhttp://www.ofdb.de/$2</div>}; + $html =~ s{<font face="Arial,Helvetica,sans-serif" size="2" class="Blocksatz"><p\s*class="Blocksatz"><b>Inhalt:</b>\s?([^<]*)<a href="(plot/[0-9]*[^"]*)">\s?<b>\[mehr\]</b></a></p></font>} + {<div class="synopsis">$1\nhttp://www.ofdb.de/$2</div>}gm; + $html =~ s{%DF}{ss}; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # if $word looks like an EAN, do a EAN search, otherwise title search + my $kat = ($word =~ /^[\dX]{8}[\dX]*$/) ? "EAN" : "Titel"; + + return "http://www.ofdb.de/view.php?page=suchergebnis&Kat=$kat&SText=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + utf8::decode($url); + return 'http://www.ofdb.de/' . $url; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getSearchCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getName + { + return "OFDb.de"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOdeonHU.pm b/lib/gcstar/GCPlugins/GCfilms/GCOdeonHU.pm new file mode 100644 index 0000000..0bd78c8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOdeonHU.pm @@ -0,0 +1,305 @@ +package GCPlugins::GCfilms::GCOdeonHU; + +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginOdeonHU; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ m:(kat.phtml\?id=.*):) + { #? + my $url = '/' . $1; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "span") + { + $self->{insideTitle} = ($attr->{class} eq "ver11 modB colDD0008"); + } + elsif ($tagname eq "td") + { + $self->{insideFieldName} = ($attr->{class} eq "ver9 col102643"); + $self->{insideFieldValue} = ($attr->{class} eq "ver11 colblack"); + $self->{insidePersonType} = ($attr->{class} eq "ver9 col1D5263 pad5"); + $self->{insideSynopsis} = ($attr->{class} eq "ver11 col102643 pad2"); + $self->{insideRating} = ($attr->{class} eq "text_cat_score"); + + if ($self->{insideSynopsis} + && (length($self->{curInfo}->{synopsis}) > 20)) + { + $self->{insideSynopsis} = 0; + } + } + elsif ($tagname eq "img") + { + #if (! $self->{curInfo}->{image}) { + if ($attr->{src} =~ m:img/album/.*\.jpg$:) + { + my $img = 'http://odeon.hu/'; + $img .= $attr->{src}; + $self->{curInfo}->{image} = $img; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq "a") + { + $self->{isMovie} = 0; + } + if ($tagname eq "td") + { + $self->{insideFieldName} = 0; + $self->{insideFieldValue} = 0; + $self->{insidePersonType} = 0; + $self->{insideSynopsis} = 0; + $self->{insideRating} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + #return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + if ($self->{inside}->{b}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext; + return; + } + else + { + if ($origtext =~ m/\[(.*),\s+([0-9]+)\]/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $2; + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $1; + $self->{isMovie} = 0; + } + } + } + } + else + { + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + return; + } + + if ($self->{insideFieldName}) + { + $self->{FieldName} = "original" if $origtext =~ m/^eredeti/; + $self->{FieldName} = "date" if $origtext =~ m/^..?v:/; + $self->{FieldName} = "country" if $origtext =~ m/^nemzet:/; + $self->{FieldName} = "time" if $origtext =~ m/^hossz:/; + $self->{FieldName} = "todo" if $origtext =~ m/^k..?p:/; + $self->{FieldName} = "todo" if $origtext =~ m/^kiad/; + $self->{FieldName} = "todo" if $origtext =~ m/^dial..?gus:/; + $self->{FieldName} = "genre" if $origtext =~ m/^m..?faj:/; + + $self->{insideFieldName} = 0; + return; + } + + if ($self->{insideFieldValue}) + { + my $txt = $origtext; + my $name = $self->{FieldName}; + $txt =~ s/^\s*//; + $txt =~ s/\s*$//; + $txt =~ s/\s+/ /g; + $txt =~ s/\s*perc$// if $name eq "time"; + return + if $txt =~ m/^\s*$/; + + if ($self->{curInfo}->{$name} !~ m/^\s*$/) + { + $self->{curInfo}->{$name} .= "," . $txt; + } + else + { + $self->{curInfo}->{$name} = $txt; + } + + return; + } + + if ($self->{insidePersonType}) + { + if ($self->{inside}->{b}) + { + my $name = 0; + $name = "director" if $origtext =~ m/^Rendez/; + $name = "actors" if $origtext =~ m/^Szerepl/; + if ($name) + { + $self->{PersonType} = $name; + } + else + { + $self->{insidePersonType} = 0; + } + return; + } + elsif ($self->{inside}->{a}) + { + my $name = $self->{PersonType}; + if ($self->{curInfo}->{$name} !~ m/^\s*$/) + { + $self->{curInfo}->{$name} .= "," . $origtext; + } + else + { + $self->{curInfo}->{$name} = $origtext; + } + #$self->{curInfo}->{actors} .= $origtext.', ' + #if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + #$self->{actorsCounter}++; + } + + return; + } + + if ($self->{insideSynopsis}) + { + my $txt = $origtext; + $txt =~ s/\r/\n/g; + $txt =~ s/^\s+//g; + $txt =~ s/\s+$//g; + $self->{curInfo}->{synopsis} .= $txt; + } + if ($self->{insideRating}) + { + $self->{curInfo}->{ratingpress} = int($origtext + 0.5) + if $origtext =~ /^[0-9.]+$/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + }; + + $self->{isMovie} = 0; + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + if ($self->{parsingList}) + { + $html =~ s{</?span[^>]*>}{}gi; # remove all <span> tags + } + + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://odeon.hu/kat.phtml?". + "search=$word&scat=5&btn_hirlev.x=13&btn_hirlev.y=5"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.odeon.hu$url"; + } + + sub getName + { + return "odeon.hu"; + } + + sub getAuthor + { + return 'Anonymous'; + } + + sub getLang + { + return 'HU'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm b/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm new file mode 100644 index 0000000..29eef60 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm @@ -0,0 +1,327 @@ +# +# More information here: http://wiki.gcstar.org/en/websites_plugins +# +# GCcollection should be replaced with the kind of collection your +# plugin deals with. e.g. GCfilms, GCgames, GCbooks,... + +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCcollection::GCOnet; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginOnet; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + # Your code here + + return "http://film.onet.pl/filmoteka.html?S=$word"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + # Your code here + + return "http://film.onet.pl/" . $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + return "ISO-8859-2"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "Onet"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Marek Cendrowicz'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'PL'; + } + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return ""; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{inside}->{list_title} && $tagname eq 'a') + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + $self->{listTitle} = 1; + } + } + else + { + if ($attr->{class} eq 'tyw') + { + $self->{itemTitle} = 1; + } + elsif ($tagname eq 'div' && $attr->{class} eq 'a2') + { + $self->{itemDescription} = 1; + } + elsif ($attr->{class} eq 'item_actor') + { + $self->{itemActor} = 1; + } + elsif ($tagname eq 'img' + && $attr->{class} eq 'pic' + && ($attr->{alt} eq 'Galeria' || $attr->{alt} eq 'Plakat')) + { + $self->{curInfo}->{image} = "http://film.onet.pl/" . $attr->{src}; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{listTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{listTitle} = 0; + } + elsif ($self->{inside}->{list_date}) + { + ($self->{itemsList}[ $self->{itemIdx} ]->{date}) = ($origtext =~ m/,\s+(\d+)$/); + } + } + else + { + if ($self->{itemTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{itemTitle} = 0; + } + elsif ($self->{inside}->{item_country}) + { + ($self->{curInfo}->{original}) = ($origtext =~ m/(.*)\s+\(/); + ($self->{curInfo}->{country}, $self->{curInfo}->{date}) = + ($origtext =~ m/(\w+),\s+(\d+)\)/); + $origtext =~ s|/|, |g; + ($self->{curInfo}->{genre}) = ($origtext =~ m/\)(.*)/); + } + elsif ($self->{inside}->{item_time}) + { + ($self->{curInfo}->{time}, $self->{curInfo}->{age}) = + ($origtext =~ m/czas\s+(\d+).*\s+od\s+(\d+)/); + } + elsif ($self->{inside}->{item_director}) + { + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{itemDescription}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{itemDescription} = 0; + } + elsif ($self->{itemActor}) + { + $self->{curInfo}->{actors} .= + $self->{curInfo}->{actors} ? ", " . $origtext : $origtext; + $self->{itemActor} = 0; + } + elsif ($self->{inside}->{item_rating}) + { + ($self->{curInfo}->{ratingpress}) = int($origtext * 2 + 0.5); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # This member should be initialized as a reference + # to a hash. Each keys is a field that could be + # in results with value 1 or 0 if it is returned + # or not. For the list of keys, check the model file + # (.gcm) and search for tags <field> in + # /collection/options/fields/results + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = ''; + + # Do your init stuff here + bless($self, $class); + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{<B>(.*?)</B>}{$1}gms; + + if ($self->{parsingList}) + { + + $html =~ s{<TD class=a2 width="100%">(.*?)</TD>} + {<list_title>$1</list_title>}gms; + $html =~ s{<FONT class=a0 color="#993300">(.*?)</FONT>} + {<list_date>$1</list_date>}gms; + } + else + { + $html =~ s{<BR>}{}g; + $html =~ s{<TD class=a2 valign=top width="100%">(.*?)<} + {<item_country>$1</item_country><}gms; + $html =~ s{<SPAN class=a1>(.*?)</SPAN>} + {<item_time>$1</item_time>}gms; + $html =~ s{Re.yseria: (.*?)Scenariusz} + {<item_director>$1</item_director>}gms; + $html =~ s{Re.yseria: (.*?)wi.cej} + {<item_director>$1</item_director>}gms; + $html =~ s{a2><A class=u} + {a2><A class=item_actor}gms; + $html =~ s{Ocena filmu.*([0-9]\.[0-9]+)/5} + {<item_rating>$1</item_rating>}gms; + } + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCPortHU.pm b/lib/gcstar/GCPlugins/GCfilms/GCPortHU.pm new file mode 100644 index 0000000..e460584 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCPortHU.pm @@ -0,0 +1,343 @@ +package GCPlugins::GCfilms::GCPortHU; + +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginPortHU; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ m:(/pls/fi/films.film_page.*):) + { + if ($self->{insideBoldText}) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + else + { + $self->{isMovie} = 0; + $self->{isInfo} = 0; + } + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} eq "txt") + { + $self->{isInfo}++ + if $self->{isInfo}; + } + $self->{insideBoldText} = ($attr->{class} eq "btxt"); + } + } + else + { + if ($tagname eq "img") + { + if ( ($attr->{class} eq "object_picture") + && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $attr->{src}; + $self->{insideOtherTitles} = 0; + $self->{insideDescription} = 1; + } + } + elsif ($tagname eq "div") + { + if (($attr->{class} eq "separator") + && $self->{insideActors}) + { + $self->{insideActors} = 0; + $self->{insideSynopsis} = 1; + } + elsif (($attr->{class} eq "object_picture") + && (!$self->{curInfo}->{image})) + { + $attr->{style} =~ m/url\(([^\)]*)\)/; + $self->{curInfo}->{image} = $1; + $self->{insideOtherTitles} = 0; + $self->{insideDescription} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} eq "blackbigtitle") + { + $self->{insideTitle} = 1; + } + elsif ($attr->{class} eq "btxt") + { + $self->{insideBoldText} = 1; + } + else + { + $self->{insideBoldText} = 0; + } + $self->{insideNormalText} = ($attr->{class} eq "txt"); + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isInfo} == 1) + { + if ($origtext =~ m/\((.*)\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $1; + } + $self->{isInfo} = 0 + if $origtext =~ m/^ $/; + } + if ($self->{isInfo} == 2) + { + if ($origtext =~ m/([0-9]+)\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + if ($origtext =~ m/([0-9]+)\sperc/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{time} = $1; + } + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + $self->{insideOtherTitles} = 1; + $self->{insideDescription} = 1; + return; + } + if ( $self->{insideOtherTitles} + && $self->{insideNormalText}) + { + if ($origtext =~ m/\((.*)\)/) + { + $self->{curInfo}->{original} = $1; + } + $self->{insideOtherTitles} = 0; + return; + } + if ( $self->{insideDescription} + && $self->{insideBoldText}) + { + if ($origtext =~ m/([0-9]+)\s+perc/) + { + $self->{curInfo}->{time} = $1; + } + if ($origtext =~ m/([0-9]+)$/) + { + $self->{curInfo}->{date} = $1; + } + if ($origtext =~ m/^([0-9]+)\s+�v/) + { + $self->{curInfo}->{age} = $1; + } + } + + if ($origtext =~ m/^rendez/) + { + $self->{insideDirector} = 1; + $self->{insideOtherTitles} = 0; + $self->{insideDescription} = 0; + return; + } + if ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + return; + } + + if ($origtext =~ m/^szerepl/) + { + $self->{insideActors} = 1; + return; + } + if ($self->{insideActors}) + { + if ($self->{inside}->{a}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + return; + } + elsif ($origtext =~ m/\((.*)\)/) + { + # As we incremented it above, we have one more chance here to add a role + # Without <= we would skip the role for last actor + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} - 1 ]}, + $1 + if ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + } + } + + if ( $origtext =~ m/^(Linkek|Bemutat|Aj�nl�k)/ + && $self->{insideBoldText}) + { + $self->{parsingEnded} = 1; + $self->{insideSynopsis} = 0; + return; + } + + if ( $self->{insideSynopsis} + && $self->{insideNormalText} + && $self->{inside}->{span} + && !$self->{inside}->{a}) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActors} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActors} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $base_url = 'http://www.port.hu/pls/ci/cinema.film_creator'; + return "$base_url?i_text=$word&i_film_creator=1"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.port.hu$url"; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getName + { + return "port.hu"; + } + + sub getAuthor + { + return 'Anonymous'; + } + + sub getLang + { + return 'HU'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCStopklatka.pm b/lib/gcstar/GCPlugins/GCfilms/GCStopklatka.pm new file mode 100644 index 0000000..8f4290e --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCStopklatka.pm @@ -0,0 +1,355 @@ +# +# More information here: http://wiki.gcstar.org/en/websites_plugins +# +# GCcollection should be replaced with the kind of collection your +# plugin deals with. e.g. GCfilms, GCgames, GCbooks,... + +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCcollection::GCStopklatka; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginStopklatka; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + # Your code here + + return "http://www.stopklatka.pl/szukaj/szukaj.asp?kategoria=film&szukaj=$word"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + # Your code here + + return "http://www.stopklatka.pl/" . $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + return "ISO-8859-2"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "Stopklatka"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Marek Cendrowicz'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'PL'; + } + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return ""; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{inside}->{list_details}) + { + if ($tagname eq 'a') + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + + $self->{listTitle} = 1 + if ($attr->{class} eq 'searchTitle textB'); + + $self->{listYear} = 1 + if ($attr->{class} eq 'searchTitle'); + } + } + else + { + if ($attr->{class} eq 'wydarzenie_tytul') + { + $self->{pre_itemTitle} = 1; + } + elsif ($self->{pre_itemTitle}) + { + if ($tagname eq 'h1') + { + $self->{itemTitle} = 1; + } + elsif ($tagname eq 'h2') + { + $self->{itemOriginalTitle} = 1; + } + elsif ($tagname eq 'table') + { + $self->{pre_itemTitle} = 0; + } + } + elsif ($attr->{class} eq 'film_pozycja') + { + $self->{pre_itemDetails} = 1; + } + elsif ($attr->{class} eq 'main_sub_table film') + { + $self->{itemMain} = 1; + } + elsif ($tagname eq 'img' && $self->{itemMain}) + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{listTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{listTitle} = 0; + } + elsif ($self->{listYear}) + { + $origtext =~ s/\((.*?)\)/$1/gms; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext; + $self->{listYear} = 0; + } + } + else + { + if ($self->{itemTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{itemTitle} = 0; + } + elsif ($self->{itemOriginalTitle}) + { + $origtext =~ s/\s*\((.*?)\)/$1/gms; + $self->{curInfo}->{original} = $origtext; + $self->{itemOriginalTitle} = 0; + } + elsif ($self->{itemDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{itemDirector} = 0; + $self->{pre_itemDetails} = 0; + } + elsif ($self->{itemDetails}) + { + ( + $self->{curInfo}->{genre}, $self->{curInfo}->{country}, + $self->{curInfo}->{date}, $self->{curInfo}->{time} + ) = split(/, /, $origtext); + $self->{curInfo}->{time} =~ s/ min//; + $self->{itemDetails} = 0; + $self->{pre_itemDetails} = 0; + } + elsif ($self->{itemActors}) + { + $self->{curInfo}->{actors} = $origtext; + $self->{itemActors} = 0; + $self->{pre_itemDetails} = 0; + } + elsif ($self->{itemMain}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{itemMain} = 0; + } + elsif ($self->{inside}->{item_rank}) + { + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + $self->{pre_itemDetails} = 0; + } + elsif ($self->{pre_itemDetails}) + { + $self->{itemDirector} = 1 + if ($origtext eq 'reżyseria:'); + + $self->{itemDetails} = 1 + if ($origtext eq 'szczegóły:'); + + $self->{itemActors} = 1 + if ($origtext eq 'obsada:'); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # This member should be initialized as a reference + # to a hash. Each keys is a field that could be + # in results with value 1 or 0 if it is returned + # or not. For the list of keys, check the model file + # (.gcm) and search for tags <field> in + # /collection/options/fields/results + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = ''; + + # Do your init stuff here + bless($self, $class); + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # Your code to modify $html here. + if ($self->{parsingList}) + { + $html =~ + s|<!-- record_start -->(.*?)<!-- record_end -->|<list_details>$1</list_details>|gms; + } + else + { + $html =~ s|<nobr>(.*?)</nobr>|$1|gms; + $html =~ s|<span class="bold">(.*?)</span>|$1|gms; + $html =~ s|<a href="/filmowcy/osoba.*?">(.*?)</a>|$1|gms; + $html =~ +s|script type="text/javascript">document.write\(getOcena\((.*?)\)\);</script>|<item_rank>$1</item_rank>|gms; + } + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedb.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedb.pm new file mode 100644 index 0000000..f7636cc --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedb.pm @@ -0,0 +1,337 @@ +package GCPlugins::GCfilms::GCthemoviedb; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedb; + + use base 'GCPlugins::GCfilms::GCfilmsPluginsBase'; + use XML::Simple; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page !~ m/>Nothing found.<\/movie/) + { + $xml = $xs->XMLin( + $page, + ForceArray => [ 'movie', 'alternative_name' ], + KeyAttr => ['id'] + ); + my $movie; + foreach $movie (keys(%{$xml->{'movies'}->{'movie'}})) + { + # We only want movies, not series and everything else the api returns + if ($xml->{'movies'}->{'movie'}->{$movie}->{'type'} eq "movie") + { + $self->{itemIdx}++; + my $url = +"http://api.themoviedb.org/2.1/Movie.getInfo/".$self->siteLanguage()."/xml/9fc8c3894a459cac8c75e3284b712dfc/" + . $movie; + # If the release date is missing, it will be returned as an array, so only save the release if + # it's not an array + my $released = ""; + if (!ref($xml->{'movies'}->{'movie'}->{$movie}->{'released'})) + { + $released = $xml->{'movies'}->{'movie'}->{$movie}->{'released'}; + } + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $released; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = + $xml->{'movies'}->{'movie'}->{$movie}->{'name'}; + # Now, check if there's any alternative names, and if so, add them in as + # additional search results. + for my $alternateName ( + @{$xml->{'movies'}->{'movie'}->{$movie}->{alternative_name}}) + { + if (!ref($alternateName)) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $released; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $alternateName; + } + } + + } + } + } + } + else + { + $xml = $xs->XMLin( + $page, + ForceArray => [ 'country', 'person', 'category', 'size', 'alternative_name' ], + KeyAttr => [''] + ); + my $usingAlternateName = 0; + if ( + ( + $xml->{movies}->{movie}->{name} ne + $self->{itemsList}[ $self->{wantedIdx} ]->{title} + ) + && ($self->{itemsList}[ $self->{wantedIdx} ]->{title}) + ) + { + # Name returned by tmdb is different to the one the user selected + # this means they choose an translated name, so use the name they choose + # as the default, and put tmdb's name in as the original (untranslated) name of the movie + $self->{curInfo}->{title} = $self->{itemsList}[ $self->{wantedIdx} ]->{title}; + $self->{curInfo}->{original} = $xml->{movies}->{movie}->{name}; + } + else + { + $self->{curInfo}->{title} = $xml->{movies}->{movie}->{name}; + } + # Now, add any alternate names + for my $alternateName (@{$xml->{movies}->{movie}->{alternative_name}}) + { + if ((!ref($alternateName)) && ($alternateName ne $self->{curInfo}->{title})) + { + $self->{curInfo}->{original} .= ", " + if $self->{curInfo}->{original}; + $self->{curInfo}->{original} .= $alternateName; + } + } + + $self->{curInfo}->{webPage} = $xml->{movies}->{movie}->{url}; + + # The following fields could be missing from the xml, so we need to check if they're blank + # (in which case they'll be a array) + $self->{curInfo}->{synopsis} = $xml->{movies}->{movie}->{overview} + if (!ref($xml->{movies}->{movie}->{overview})); + $self->{curInfo}->{ratingpress} = $xml->{movies}->{movie}->{rating} + if (!ref($xml->{movies}->{movie}->{rating})); + $self->{curInfo}->{date} = $xml->{movies}->{movie}->{released} + if (!ref($xml->{movies}->{movie}->{released})); + $self->{curInfo}->{time} = $xml->{movies}->{movie}->{runtime} . " mins" + if (!ref($xml->{movies}->{movie}->{runtime})); + + if (!ref($xml->{movies}->{movie}->{certification})) + { + my $certification; + $certification = $xml->{movies}->{movie}->{certification}; + $self->{curInfo}->{age} = 1 + if ($certification eq 'Unrated') || ($certification eq 'Open'); + $self->{curInfo}->{age} = 2 + if ($certification eq 'G') || ($certification eq 'Approved'); + $self->{curInfo}->{age} = 5 + if ($certification eq 'PG') + || ($certification eq 'M') + || ($certification eq 'GP'); + $self->{curInfo}->{age} = 13 if $certification eq 'PG-13'; + $self->{curInfo}->{age} = 17 if $certification eq 'R'; + $self->{curInfo}->{age} = 18 + if ($certification eq 'NC-17') || ($certification eq 'X'); + } + + for my $country (@{$xml->{movies}->{movie}->{countries}->{country}}) + { + push @{$self->{curInfo}->{country}}, $country->{name}; + } + $self->{curInfo}->{country} =~ s/, $//; + for my $person (@{$xml->{movies}->{movie}->{cast}->{person}}) + { + my $name = $person->{name}; + # Strip any blank spaces from start and end of name + $name =~ s/\s*$//; + $name =~ s/^\s*//; + if ($person->{job} eq "Director") + { + $self->{curInfo}->{director} .= $name . ', '; + } + elsif ($person->{job} eq "Actor") + { + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$name]; + my $role = $person->{character}; + $role =~ s/\s*$//; + $role =~ s/^\s*//; + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} ]}, $role; + $self->{actorsCounter}++; + } + } + } + $self->{curInfo}->{director} =~ s/, $//; + for my $category (@{$xml->{movies}->{movie}->{categories}->{category}}) + { + push @{$self->{curInfo}->{genre}}, [ $category->{name} ] + if ($category->{type} eq 'genre'); + } + for my $image (@{$xml->{movies}->{movie}->{images}->{image}}) + { + if ($image->{type} eq "poster") + { + # Fetch either the big original pic, or just the small thumbnail pic + if ( (($self->{bigPics}) && ($image->{size} eq "original")) + || (!($self->{bigPics}) && ($image->{size} eq "thumb"))) + { + if (!$self->{curInfo}->{image}) + { + $self->{curInfo}->{image} = $image->{url}; + } + } + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.themoviedb.org"; + } + elsif (index($url, "api") < 0) + { + # Url isn't for the movie db api, so we need to find the movie id + # and return a url corresponding to the api page for this movie + my $found = index(reverse($url), "/"); + if ($found >= 0) + { + my $id = substr(reverse($url), 0, $found); + $url = +"http://api.themoviedb.org/2.1/Movie.getInfo/".$self->siteLanguage()."/xml/9fc8c3894a459cac8c75e3284b712dfc/" + . reverse($id); + } + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return +"http://api.themoviedb.org/2.1/Movie.search/".$self->siteLanguage()."/xml/9fc8c3894a459cac8c75e3284b712dfc/$word"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub siteLanguage + { + my $self = shift; + + return 'en'; + } + + sub getName + { + return "The Movie DB"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub isPreferred + { + return 1; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbDE.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbDE.pm new file mode 100644 index 0000000..bc55111 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbDE.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCfilms::GCthemoviedbDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCThemoviedb; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedbDE; + + use base qw(GCPlugins::GCfilms::GCPluginThemoviedb); + use XML::Simple; + + sub siteLanguage + { + my $self = shift; + + return 'de'; + } + + sub getName + { + return "The Movie DB (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbES.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbES.pm new file mode 100644 index 0000000..77dc03a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbES.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCfilms::GCthemoviedbES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCThemoviedb; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedbES; + + use base qw(GCPlugins::GCfilms::GCPluginThemoviedb); + use XML::Simple; + + sub siteLanguage + { + my $self = shift; + + return 'es'; + } + + sub getName + { + return "The Movie DB (ES)"; + } + + sub getLang + { + return 'ES'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbFR.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbFR.pm new file mode 100644 index 0000000..624b64f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbFR.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCfilms::GCthemoviedbFR; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCThemoviedb; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedbFR; + + use base qw(GCPlugins::GCfilms::GCPluginThemoviedb); + use XML::Simple; + + sub siteLanguage + { + my $self = shift; + + return 'fr'; + } + + sub getName + { + return "The Movie DB (FR)"; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCfilmsAmazonCommon.pm b/lib/gcstar/GCPlugins/GCfilms/GCfilmsAmazonCommon.pm new file mode 100644 index 0000000..a7178fa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCfilmsAmazonCommon.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCfilms::GCfilmsAmazonCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; +use GCPlugins::GCstar::GCAmazonCommon; + +{ + package GCPlugins::GCfilms::GCfilmsAmazonPluginsBase; + + use base ('GCPlugins::GCfilms::GCfilmsPluginsBase', 'GCPlugins::GCstar::GCPluginAmazonCommon'); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{codeField} = ''; + $self->{searchType} = 'dvd'; + + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub getEanField + { + return 'title'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCfilmsCommon.pm b/lib/gcstar/GCPlugins/GCfilms/GCfilmsCommon.pm new file mode 100644 index 0000000..d67952f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCfilmsCommon.pm @@ -0,0 +1,70 @@ +package GCPlugins::GCfilms::GCfilmsCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +our $MAX_ACTORS = 30; +our $MAX_DIRECTORS = 4; + +use GCPlugins::GCPluginsBase; + +{ + + package GCPlugins::GCfilms::GCfilmsPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub loadUrl + { + my ($self, $url) = @_; + + $self->{actorsCounter} = 0; + $self->{directorCounter} = 0; + $self->SUPER::loadUrl($url); + + if (!$self->{curInfo}->{title} && $self->{curInfo}->{original}) + { + $self->{curInfo}->{title} = $self->{curInfo}->{original}; + $self->{curInfo}->{original} = ''; + } + return $self->{curInfo}; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCAlapage.pm b/lib/gcstar/GCPlugins/GCgames/GCAlapage.pm new file mode 100644 index 0000000..0d21a94 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAlapage.pm @@ -0,0 +1,262 @@ +package GCPlugins::GCgames::GCAlapage;
+
+###################################################
+#
+# Copyright 2005-2011 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginAlapage;
+
+ use base 'GCPlugins::GCgames::GCgamesPluginsBase';
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ if (($tagname eq 'div') && ($attr->{class} eq 'infosProduit'))
+ {
+ $self->{itemIdx}++;
+ $self->{isGame} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($self->{isGame}))
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $attr->{title};
+ $self->{isGame} = 0 ;
+ }
+ elsif (($tagname eq 'span') && ($attr->{class} eq 'liensAriane') && ($self->{isGame}) && ($self->{itemsList}[$self->{itemIdx}]->{platform} eq ''))
+ {
+ $self->{isPlatform} = 1 ;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+
+ if (($tagname eq 'h1') && ($attr->{id} eq 'zm_name_description'))
+ {
+ $self->{isName} = 1 ;
+ }
+ elsif (($tagname eq 'div') && ($attr->{id} eq 'zm_description_long'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ elsif (($tagname eq 'span') && ($attr->{rel} eq 'images nocount') && ($self->{bigPics}))
+ {
+ $self->{curInfo}->{boxpic} = $attr->{href_img} ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{id} eq 'zm_main_image') && !($self->{bigPics}))
+ {
+ $self->{curInfo}->{boxpic} = $attr->{src} ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isPlatform})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+
+ $origtext =~ s/Sony //i;
+ $origtext =~ s/Jeux PC/PC/i;
+
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $origtext;
+ $self->{Save_plateforme} = $self->{itemsList}[$self->{itemIdx}]->{platform};
+ $self->{isPlatform} = 0;
+
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ if ($self->{isName})
+ {
+ $self->{curInfo}->{name} = $origtext;
+ $self->{curInfo}->{platform} = $self->{Save_plateforme};
+ $self->{isName} = 0 ;
+
+ if ($self->{ean} ne '')
+ {
+ $self->{curInfo}->{ean} = $self->{ean};
+ }
+
+ }
+ elsif ($self->{isDescription} eq 1)
+ {
+ # Enleve les blancs dans le texte
+ $origtext =~ s/ / /g;
+ $self->{curInfo}->{description} = $origtext;
+ $self->{isDescription} = 0 ;
+ }
+
+ }
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+
+ return ;
+
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1,
+ released => 0,
+ genre => 0
+ };
+
+ $self->{isName} = 0;
+ $self->{isGame} = 0;
+ $self->{isPlatform} = 0;
+ $self->{Save_plateforme} = '';
+ $self->{isDescription} = 0;
+ $self->{ean} = '';
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s|<br>||gi;
+ $html =~ s|<br />||gi;
+
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|\x{92}|'|gi;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|œ|oe|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+ }
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'ean')
+ {
+ $self->{ean} = $word;
+ }
+ else
+ {
+ $self->{ean} = '';
+ }
+
+ return 'http://search.alapage.com/search?a=8584451-0-0&s='.$word;
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url;
+ }
+
+ sub getName
+ {
+ return 'Alapage';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ #return "UTF-8";
+ return "ISO-8859-15";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['ean', 'name'];
+ }
+
+ sub getDefaultPictureSuffix
+ {
+ return '.jpg';
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCAmazon.pm b/lib/gcstar/GCPlugins/GCgames/GCAmazon.pm new file mode 100644 index 0000000..36b7890 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAmazon.pm @@ -0,0 +1,115 @@ +package GCPlugins::GCgames::GCAmazon;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesAmazonCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginAmazon;
+
+ use base 'GCPlugins::GCgames::GCgamesAmazonPluginsBase';
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{suffix} = 'com';
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s/>Product Description</><tpfdescription><\/tpfdescription></;
+ # Le descriptif pouvant contenir des balises html je le repere maintenant
+ my $found = index($html,"<tpfdescription>");
+ if ( $found >= 0 )
+ {
+ my $html2 = substr($html, $found,length($html)- $found);
+
+ $html2 =~ s|</li>||gi;
+ $html2 =~ s|<p>|\n\n|gi;
+ $html2 =~ s|</p>||gi;
+ $html2 =~ s|<ul>|\n|gi;
+ $html2 =~ s|</ul>|\n\n|gi;
+ $html2 =~ s|<strong>||gi;
+ $html2 =~ s|</strong>||gi;
+ $html2 =~ s|<em>||gi;
+ $html2 =~ s|</em>||gi;
+
+ $html = substr($html, 0, $found) . $html2 ;
+ }
+
+ $html =~ s/Release Date :</<tpfdateparution></gi;
+ $html =~ s/Release Date:</<tpfdateparution></gi;
+ $html =~ s/<b>Platform:<\/b> </<tpfplateforme><\/tpfplateforme></gi;
+ $html =~ s/<b>Platform:<\/b>/<tpfplateforme><\/tpfplateforme>/gi;
+ $html =~ s/registerImage\("original_image",/<\/script><tpfcouverture src=/gi;
+ $html =~ s/registerImage\("alt_image_1",/<\/script><tpfscreenshot1 src=/gi;
+ $html =~ s/registerImage\("alt_image_2",/<\/script><tpfscreenshot2 src=/gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<li>|\n*|gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|\x{92}|'|gi;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|œ|oe|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+
+ }
+ return $html;
+ }
+
+ sub getName
+ {
+ return 'Amazon (US)';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCAmazonCA.pm b/lib/gcstar/GCPlugins/GCgames/GCAmazonCA.pm new file mode 100644 index 0000000..a54c4ef --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAmazonCA.pm @@ -0,0 +1,115 @@ +package GCPlugins::GCgames::GCAmazonCA;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesAmazonCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginAmazonCA;
+
+ use base 'GCPlugins::GCgames::GCgamesAmazonPluginsBase';
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{suffix} = 'ca';
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s/>Product Description</><tpfdescription><\/tpfdescription></;
+ # Le descriptif pouvant contenir des balises html je le repere maintenant
+ my $found = index($html,"<tpfdescription>");
+ if ( $found >= 0 )
+ {
+ my $html2 = substr($html, $found,length($html)- $found);
+
+ $html2 =~ s|</li>||gi;
+ $html2 =~ s|<p>|\n\n|gi;
+ $html2 =~ s|</p>||gi;
+ $html2 =~ s|<ul>|\n|gi;
+ $html2 =~ s|</ul>|\n\n|gi;
+ $html2 =~ s|<strong>||gi;
+ $html2 =~ s|</strong>||gi;
+ $html2 =~ s|<em>||gi;
+ $html2 =~ s|</em>||gi;
+
+ $html = substr($html, 0, $found) . $html2 ;
+ }
+
+ $html =~ s/Release Date :</<tpfdateparution></gi;
+ $html =~ s/Release Date:</<tpfdateparution></gi;
+ $html =~ s/<b>Platform:<\/b> </<tpfplateforme><\/tpfplateforme></gi;
+ $html =~ s/<b>Platform:<\/b>/<tpfplateforme><\/tpfplateforme>/gi;
+ $html =~ s/registerImage\("original_image",/<\/script><tpfcouverture src=/gi;
+ $html =~ s/registerImage\("alt_image_1",/<\/script><tpfscreenshot1 src=/gi;
+ $html =~ s/registerImage\("alt_image_2",/<\/script><tpfscreenshot2 src=/gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<li>|\n*|gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|\x{92}|'|gi;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|œ|oe|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+
+ }
+ return $html;
+ }
+
+ sub getName
+ {
+ return 'Amazon (CA)';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCAmazonDE.pm b/lib/gcstar/GCPlugins/GCgames/GCAmazonDE.pm new file mode 100644 index 0000000..1dbc106 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAmazonDE.pm @@ -0,0 +1,114 @@ +package GCPlugins::GCgames::GCAmazonDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesAmazonCommon; + +{ + package GCPlugins::GCgames::GCPluginAmazonDE; + + use base 'GCPlugins::GCgames::GCgamesAmazonPluginsBase'; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{suffix} = 'de'; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s/>Produktbeschreibungen</><tpfdescription><\/tpfdescription></; + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"<tpfdescription>"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found,length($html)- $found); + + $html2 =~ s|</li>||gi; + $html2 =~ s|<p>|\n\n|gi; + $html2 =~ s|</p>||gi; + $html2 =~ s|<ul>|\n|gi; + $html2 =~ s|</ul>|\n\n|gi; + $html2 =~ s|<strong>||gi; + $html2 =~ s|</strong>||gi; + $html2 =~ s|<em>||gi; + $html2 =~ s|</em>||gi; + + $html = substr($html, 0, $found) . $html2 ; + } + + $html =~ s/Erscheinungsdatum :</<tpfdateparution></gi; + $html =~ s/Erscheinungsdatum:</<tpfdateparution></gi; + $html =~ s/<b>Plattform:<\/b> </<tpfplateforme><\/tpfplateforme></gi; + $html =~ s/<b>Plattform:<\/b>/<tpfplateforme><\/tpfplateforme>/gi; + $html =~ s/registerImage\("original_image",/<\/script><tpfcouverture src=/gi; + $html =~ s/registerImage\("alt_image_1",/<\/script><tpfscreenshot1 src=/gi; + $html =~ s/registerImage\("alt_image_2",/<\/script><tpfscreenshot2 src=/gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<li>|*|gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + return $html; + } + + sub getName + { + return 'Amazon (DE)'; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCAmazonFR.pm b/lib/gcstar/GCPlugins/GCgames/GCAmazonFR.pm new file mode 100644 index 0000000..1bceb23 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAmazonFR.pm @@ -0,0 +1,118 @@ +package GCPlugins::GCgames::GCAmazonFR; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesAmazonCommon; + +{ + package GCPlugins::GCgames::GCPluginAmazonFR; + + use base 'GCPlugins::GCgames::GCgamesAmazonPluginsBase'; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{suffix} = 'fr'; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s/>Description du produit</><tpfdescription><\/tpfdescription></; + $html =~ s/>Descriptions du produit</><tpfdescription><\/tpfdescription></; + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"<tpfdescription>"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found,length($html)- $found); + + $html2 =~ s|</li>||gi; + $html2 =~ s|<p>|\n\n|gi; + $html2 =~ s|</p>||gi; + $html2 =~ s|<ul>|\n|gi; + $html2 =~ s|</ul>|\n\n|gi; + $html2 =~ s|<strong>||gi; + $html2 =~ s|</strong>||gi; + $html2 =~ s|<em>||gi; + $html2 =~ s|</em>||gi; + + $html = substr($html, 0, $found) . $html2 ; + } + + $html =~ s/Date de parution :</<tpfdateparution></gi; + $html =~ s/Date de parution:</<tpfdateparution></gi; + $html =~ s/<b>Plate-forme:<\/b> </<tpfplateforme><\/tpfplateforme></gi; + $html =~ s/<b>Plate-forme:<\/b>/<tpfplateforme><\/tpfplateforme>/gi; + $html =~ s/registerImage\("original_image",/<\/script><tpfcouverture src=/gi; + $html =~ s/registerImage\("alt_image_1",/<\/script><tpfscreenshot1 src=/gi; + $html =~ s/registerImage\("alt_image_2",/<\/script><tpfscreenshot2 src=/gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<li>|*|gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|œ|oe|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + # Bug sur Shadow of Memories pour PS2 + $html =~ s|Actio\)n|Action|gi; + + } + return $html; + } + + sub getName + { + return 'Amazon (FR)'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCAmazonJP.pm b/lib/gcstar/GCPlugins/GCgames/GCAmazonJP.pm new file mode 100644 index 0000000..80824ec --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAmazonJP.pm @@ -0,0 +1,120 @@ +package GCPlugins::GCgames::GCAmazonJP; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesAmazonCommon; + +{ + package GCPlugins::GCgames::GCPluginAmazonJP; + + use base 'GCPlugins::GCgames::GCgamesAmazonPluginsBase'; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{suffix} = 'co.jp'; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s/>商品の説明</><tpfdescription><\/tpfdescription></; + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"<tpfdescription>"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found,length($html)- $found); + + $html2 =~ s|</li>||gi; + $html2 =~ s|<p>|\n\n|gi; + $html2 =~ s|</p>||gi; + $html2 =~ s|<ul>|\n|gi; + $html2 =~ s|</ul>|\n\n|gi; + $html2 =~ s|<strong>||gi; + $html2 =~ s|</strong>||gi; + $html2 =~ s|<em>||gi; + $html2 =~ s|</em>||gi; + + $html = substr($html, 0, $found) . $html2 ; + } + + $html =~ s/発売日 :</<tpfdateparution></gi; + $html =~ s/発売日:</<tpfdateparution></gi; + $html =~ s/<b>プラットフォーム:<\/b> </<tpfplateforme><\/tpfplateforme></gi; + $html =~ s/<b>プラットフォーム:<\/b>/<tpfplateforme><\/tpfplateforme>/gi; + $html =~ s/registerImage\("original_image",/<\/script><tpfcouverture src=/gi; + $html =~ s/registerImage\("alt_image_1",/<\/script><tpfscreenshot1 src=/gi; + $html =~ s/registerImage\("alt_image_2",/<\/script><tpfscreenshot2 src=/gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<li>|*|gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + return $html; + } + + sub getName + { + return 'Amazon (JP)'; + } + + sub getCharset + { + my $self = shift; + return "SHIFT_JIS"; + } + + sub getLang + { + return 'JP'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCAmazonUK.pm b/lib/gcstar/GCPlugins/GCgames/GCAmazonUK.pm new file mode 100644 index 0000000..5108e8c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCAmazonUK.pm @@ -0,0 +1,115 @@ +package GCPlugins::GCgames::GCAmazonUK; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesAmazonCommon; + +{ + package GCPlugins::GCgames::GCPluginAmazonUK; + + use base 'GCPlugins::GCgames::GCgamesAmazonPluginsBase'; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{suffix} = 'co.uk'; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s/>Product Description</><tpfdescription><\/tpfdescription></; + $html =~ s/>Reviews</><tpfdescription><\/tpfdescription></; + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"<tpfdescription>"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found,length($html)- $found); + + $html2 =~ s|</li>||gi; + $html2 =~ s|<p>|\n\n|gi; + $html2 =~ s|</p>||gi; + $html2 =~ s|<ul>|\n|gi; + $html2 =~ s|</ul>|\n\n|gi; + $html2 =~ s|<strong>||gi; + $html2 =~ s|</strong>||gi; + $html2 =~ s|<em>||gi; + $html2 =~ s|</em>||gi; + + $html = substr($html, 0, $found) . $html2 ; + } + + $html =~ s/Release Date :</<tpfdateparution></gi; + $html =~ s/Release Date:</<tpfdateparution></gi; + $html =~ s/<b>Platform:<\/b> </<tpfplateforme><\/tpfplateforme></gi; + $html =~ s/<b>Platform:<\/b>/<tpfplateforme><\/tpfplateforme>/gi; + $html =~ s/registerImage\("original_image",/<\/script><tpfcouverture src=/gi; + $html =~ s/registerImage\("alt_image_1",/<\/script><tpfscreenshot1 src=/gi; + $html =~ s/registerImage\("alt_image_2",/<\/script><tpfscreenshot2 src=/gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<li>|*|gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + return $html; + } + + sub getName + { + return 'Amazon (UK)'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCDicoDuNet.pm b/lib/gcstar/GCPlugins/GCgames/GCDicoDuNet.pm new file mode 100644 index 0000000..fd130cf --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCDicoDuNet.pm @@ -0,0 +1,291 @@ +package GCPlugins::GCgames::GCDicoDuNet; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesCommon; + +{ + package GCPlugins::GCgames::GCPluginDicoDuNet; + + use base 'GCPlugins::GCgames::GCgamesPluginsBase'; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'div') && ($attr->{class} eq 'cat_produit')) + { + $self->{isGame} = 1 ; + $self->{isUrl} = 1 ; + } + elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isGame})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isUrl} = 0 ; + } + elsif (($tagname eq 'strong') && ($self->{isGame})) + { + $self->{isName} = 1 ; + $self->{isGame} = 0 ; + } + } + elsif ($self->{parsingTips}) + { + } + else + { + + if (($tagname eq 'h3') && ($attr->{class} eq 'produits')) + { + $self->{isGame} = 1 ; + } + elsif (($tagname eq 'span') && ($self->{isGame} eq 1) ) + { + $self->{isName} = 1 ; + $self->{isGame} = 2 ; + } + elsif (($tagname eq 'a') && ($self->{isGame} eq 2)) + { + $self->{isEditor} = 1 ; + $self->{isGame} = 0 ; + } + elsif ($tagname eq 'table') + { + $self->{isGame} = 0 ; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'vous_etes_ici')) + { + $self->{isPlatform} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} ne 'http://www.dicodunet.com/jeux-video/') && ($self->{isPlatform})) + { + $self->{isPlatform} = 2 ; + } + elsif (($tagname eq 'a') && (index($attr->{href},"www.dicodunet.com/jeux-video/img/") >= 0) && ($self->{curInfo}->{boxpic} eq '')) + { + my $html = $self->loadPage($attr->{href}, 0, 1); + my $found = index($html,"<h3 class=\"produits\">"); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('<h3 class="produits">'),length($html)- $found -length('<h3 class="produits">')); + + my $found = index($html,"<img src=\""); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('<img src="'),length($html)- $found -length('<img src="')); + $html = substr($html, 0,index($html,"\"")); + + $self->{curInfo}->{boxpic} = $html; + } + } + + } + elsif ($tagname eq 'tpfdateparution') + { + $self->{isDate} = 1 ; + } + elsif ($tagname eq 'tpfean') + { + $self->{isEan} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isName}) + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext; + $self->{isName} = 0; + + } + } + elsif ($self->{parsingTips}) + { + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + if ($self->{isName}) + { + $self->{curInfo}->{name} = $origtext; + $self->{isName} = 0 ; + } + elsif ($self->{isEditor}) + { + $self->{curInfo}->{editor} = $origtext; + $self->{isEditor} = 0 ; + } + elsif ($self->{isPlatform} eq 2) + { + $origtext =~ s/PlayStation 2/Playstation 2/i; + $origtext =~ s/Jeux PC/PC/i; + $origtext =~ s/Jeux Mac/MAC/i; + + if (($self->{curInfo}->{platform} eq '') && ($origtext ne '')) + { + $self->{curInfo}->{platform} = $origtext; + } + elsif ($origtext ne '') + { + $self->{curInfo}->{platform} .= ', '; + $self->{curInfo}->{platform} .= $origtext; + } + $self->{isPlatform} = 0; + } + elsif ($self->{isDate}) + { + $self->{curInfo}->{released} = $origtext; + $self->{curInfo}->{released} =~ s|([0-9]*)-([0-9]*)-([0-9]*)|$3.'/'.$2.'/'.$1|e; + $self->{isDate} = 0 ; + } + elsif ($self->{isEan}) + { + $self->{curInfo}->{ean} = $origtext; + $self->{isEan} = 0 ; + } + + } + } + + sub getTipsUrl + { + my $self = shift; + + return ; + + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + name => 1, + platform => 0, + genre => 0, + released => 0 + }; + + $self->{isName} = 0; + $self->{isGame} = 0; + $self->{isUrl} = 0; + $self->{isPlatform} = 0; + $self->{isEditor} = 0; + $self->{isDate} = 0; + $self->{isEan} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + my $found = index($html,"class=\"produits_box\""); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s/Version sortie le /<tpfdateparution>/gi; + $html =~ s/Code EAN : /<tpfean>/gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return ('http://www.dicodunet.com/jeux-video/recherche.php', ["q" => "$word"] ); + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.dicodunet.com/'; + } + + sub getName + { + return 'DicoDuNet'; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['name']; + } +} + +1; + diff --git a/lib/gcstar/GCPlugins/GCgames/GCGameSpot.pm b/lib/gcstar/GCPlugins/GCgames/GCGameSpot.pm new file mode 100644 index 0000000..c5870b4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCGameSpot.pm @@ -0,0 +1,490 @@ +package GCPlugins::GCgames::GCGameSpot;
+
+###################################################
+#
+# Copyright 2005-2011 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginGameSpot;
+
+ use base 'GCPlugins::GCgames::GCgamesPluginsBase';
+ use Text::Wrap;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'div')
+ {
+ $self->{isGame} = 1
+ if $attr->{class} =~ /result_title/;
+ }
+ elsif ($tagname eq 'tpfdatetpf')
+ {
+ $self->{isDate} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isGame}))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{isName} = 1;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ if (($tagname eq 'h2') && ($attr->{class} eq 'module_title'))
+ {
+ $self->{isSection} = 1;
+ }
+ elsif (($tagname eq 'th') && ($attr->{scope} eq 'row') && ($attr->{class} eq 'code') && ($self->{section} ne ''))
+ {
+ $self->{isCheat} = 1;
+ }
+ elsif (($tagname eq 'td') && ($attr->{class} eq 'effect') && ($self->{section} ne ''))
+ {
+ $self->{isDesc} = 1;
+ }
+ elsif (($tagname eq 'h3') && ($attr->{class} eq 'cheatCodeTitle') && ($self->{section} eq 'Secrets'))
+ {
+ $self->{curInfo}->{secrets} .= "\n" if $self->{curInfo}->{secrets};
+ }
+ elsif ($tagname eq 'tpfdebuttpf')
+ {
+ $self->{section} = 'Secrets';
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'head'))
+ {
+ $self->{section} = '';
+ }
+ elsif ($tagname eq 'head')
+ {
+ $self->{urlTips} = '';
+ }
+ }
+ else
+ {
+ if ($tagname eq 'img')
+ {
+ $self->{curInfo}->{boxpic} = ' '
+ if $attr->{src} =~ /no_preview/;
+ if ((! $self->{curInfo}->{boxpic}) && ($attr->{src} =~ /[^xo]boxs[^c]/))
+ {
+ $self->{curInfo}->{boxpic} = $attr->{src};
+ }
+ if ($attr->{src} =~ /thumb/)
+ {
+ my $pic = $attr->{src};
+ $pic =~ s/thumb00([0-9])/screen00$1/;
+ if ($1 && ($1 <= 2))
+ {
+ $self->{curInfo}->{'screenshot'.$1} = $pic
+ if ! $self->{curInfo}->{'screenshot'.$1};
+ }
+ }
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'boxshot'))
+ {
+ $self->{isBox} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isBox} eq 1))
+ {
+ my $html = $self->loadPage($self->getItemUrl($attr->{href}), 0, 1);
+ my $found = index($html,"id=\"main_image\" src=\"");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found +length('id="main_image" src="'),length($html)- $found -length('id="main_image" src="'));
+
+ my @array = split(/"/,$html);
+ #"
+ if ($self->{bigPics})
+ {
+ $self->{curInfo}->{boxpic} = $array[0];
+ }
+ $self->{curInfo}->{backpic} = $array[0];
+ $self->{curInfo}->{backpic} =~ s/_front/_back/;
+ }
+ $self->{isBox} = 0;
+ }
+ elsif (($tagname eq 'h1') && ($attr->{class} eq 'productPageTitle'))
+ {
+ $self->{isName} = 1 if ! $self->{curInfo}->{name};
+ }
+ elsif (($tagname eq 'meta') && ($attr->{name} eq 'description'))
+ {
+ $self->{curInfo}->{description} = $attr->{content};
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} =~ /activeFilter/))
+ {
+ $self->{curInfo}->{exclusive} = 0;
+ }
+ elsif (($tagname eq 'span') && ($attr->{class} eq 'reviewer'))
+ {
+ $self->{isRating} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isRating} eq 1))
+ {
+ $self->{isRating} = 2;
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} eq 'moreStat play_info number_of_players'))
+ {
+ $self->{isPlayers} = 1;
+ }
+ elsif (($tagname eq 'p') && ($self->{isPlayers} eq 1))
+ {
+ $self->{isPlayers} = 2;
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} eq 'publisher'))
+ {
+ $self->{isEditor} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isEditor} eq 1))
+ {
+ $self->{isEditor} = 2;
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} eq 'developer'))
+ {
+ $self->{isDeveloper} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isDeveloper} eq 1))
+ {
+ $self->{isDeveloper} = 2;
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} eq 'genre'))
+ {
+ $self->{isGenre} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isGenre}))
+ {
+ $self->{curInfo}->{genre} = $attr->{title};
+ $self->{isGenre} = 0;
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} eq 'date'))
+ {
+ $self->{isReleased} = 1;
+ }
+ elsif (($tagname eq 'a') && ($self->{isReleased} eq 1))
+ {
+ $self->{isReleased} = 2;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ /\/cheats\//) && ($attr->{class} eq 'navItemAction'))
+ {
+ $self->{urlTips} = $attr->{href};
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ if ($self->{parsingList})
+ {
+ $self->{isGame} = 0
+ if ($tagname eq 'div');
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isName})
+ {
+ $origtext =~ /^(.*?)\s*\((.*?)\)\s*$/;
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $1;
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $2;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{itemsList}[$self->{itemIdx}]->{url} . 'tpfplatformtpf' . $self->{itemsList}[$self->{itemIdx}]->{platform};
+ $self->{isName} = 0;
+ }
+ elsif ($self->{isDate})
+ {
+ $origtext =~ /^\s*Release Date:\s*(.*?)\s*$/ms;
+ $self->{itemsList}[$self->{itemIdx}]->{released} = $1;
+ $self->{isDate} = 0;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ if (($self->{isSection} eq 1) && $self->{inside}->{h2})
+ {
+ $self->{section} = 'Codes' if $origtext =~ /Cheat Codes$/;
+ $self->{section} = 'Unlockables' if $origtext =~ /Unlockables$/;
+ $self->{section} = 'Secrets' if $origtext =~ /Secrets$/;
+ $self->{section} = 'Secrets' if $origtext =~ /Easter Eggs$/;
+ $self->{isSection} = 0;
+ }
+ elsif (($self->{section} eq 'Codes') || ($self->{section} eq 'Unlockables'))
+ {
+ $origtext =~ s/^\s*//;
+ $origtext =~ s/\s*$//;
+ $Text::Wrap::columns = 80;
+ $origtext = Text::Wrap::wrap('', '', $origtext);
+
+ if ($self->{isCheat})
+ {
+ if ($self->{section} eq 'Codes')
+ {
+ $self->{tmpCheatLine} = [];
+ push @{$self->{tmpCheatLine}}, $origtext;
+ }
+ else
+ {
+ $self->{tmpCheatLine} = [];
+ ${$self->{tmpCheatLine}}[1] = $origtext;
+ }
+ $self->{isCheat} = 0;
+ }
+ elsif ($self->{isDesc})
+ {
+ if ($self->{section} eq 'Codes')
+ {
+ push @{$self->{tmpCheatLine}}, $origtext;
+ push @{$self->{curInfo}->{code}}, $self->{tmpCheatLine};
+ $self->{tmpCheatLine} = [];
+ }
+ else
+ {
+ ${$self->{tmpCheatLine}}[0] = $origtext;
+ push @{$self->{curInfo}->{unlockable}}, $self->{tmpCheatLine};
+ $self->{tmpCheatLine} = [];
+ }
+ $self->{isDesc} = 0;
+ }
+ }
+ elsif ($self->{section} eq 'Secrets')
+ {
+ $origtext =~ s/^\s*//;
+ $origtext =~ s/\s*$//;
+ return if !$origtext;
+ $self->{curInfo}->{secrets} .= "\n" if $self->{curInfo}->{secrets};
+ $self->{curInfo}->{secrets} .= $origtext;
+ }
+ }
+ else
+ {
+ if ($self->{isName})
+ {
+ $origtext =~ s/\n//g;
+ $self->{curInfo}->{name} = $origtext;
+ $self->{curInfo}->{platform} = $self->{url_plateforme};
+ $self->{curInfo}->{exclusive} = 1;
+ $self->{isName} = 0;
+ }
+ elsif ($self->{isRating} eq 2)
+ {
+ $self->{curInfo}->{ratingpress} = $origtext;
+ $self->{isRating} = 0;
+ }
+ else
+ {
+ $origtext =~ s/^\s*//;
+ $origtext =~ s/\s*$//;
+ return if !$origtext;
+ if ($self->{isReleased} eq 2)
+ {
+ $origtext =~ s/ .$//;
+ $self->{curInfo}->{released} = $origtext;
+ $self->{isReleased} = 0;
+ }
+ elsif ($self->{isEditor} eq 2)
+ {
+ $self->{curInfo}->{editor} = $origtext;
+ $self->{isEditor} = 0;
+ }
+ elsif ($self->{isDeveloper} eq 2)
+ {
+ $self->{curInfo}->{developer} = $origtext;
+ $self->{isDeveloper} = 0;
+ }
+ elsif ($self->{isPlayers} eq 2)
+ {
+ $origtext =~ s/(Players?)?\s*\(.*?$//;
+ $self->{curInfo}->{players} = $origtext;
+ $self->{isPlayers} = 0;
+ }
+ }
+ }
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+ return 'http://www.gamespot.com' .$self->{urlTips};
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1,
+ released => 1,
+ };
+
+ $self->{isName} = 0;
+ $self->{isGame} = 0;
+ $self->{isDate} = 0;
+ $self->{isCheat} = 0;
+ $self->{isDesc} = 0;
+ $self->{isTip} = 0;
+ $self->{isRating} = 0;
+ $self->{section} = '';
+ $self->{isSection} = 0;
+ $self->{isDeveloper} = 0;
+ $self->{isGenre} = 0;
+ $self->{isEditor} = 0;
+ $self->{isReleased} = 0;
+ $self->{isPlayers} = 0;
+ $self->{isBox} = 0;
+ $self->{isExclu} = 0;
+ $self->{url_plateforme} = '';
+ $self->{urlTips} = "";
+ $self->{SaveUrl} = "";
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingTips})
+ {
+ $html =~ s|<b>(.*?)</b>|$1|g;
+ $html =~ s|<i>(.*?)</i>|$1|g;
+## It takes too much time
+# $html =~ s|<li class="guideAct"><a href="(.+)">Go to Online Walkthrough|'<tpfdebuttpf>' . $self->RecupSolution($1) . '<tpffintpf>'|ge;
+ }
+ elsif ($self->{parsingList})
+ {
+ $html =~ s|Release Date|<tpfdatetpf>Release Date|g;
+ }
+ else
+ {
+ my $found = index($html,"Similar Games");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, 0, $found);
+ }
+ }
+
+ return $html;
+ }
+
+ sub RecupSolution
+ {
+ my ($self, $url) = @_;
+
+ my $html = $self->loadPage($url);
+
+ my $found = index($html,"<h2>");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found,length($html)- $found);
+ }
+ else
+ {
+ $found = index($html,"<span class=\"author\">");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found,length($html)- $found);
+ }
+ }
+
+ $html = substr($html, 0, index($html, " rel=\"next\">"));
+
+ $html =~ s|<a class="next" href="/gameguides.html"||ge;
+ $html =~ s|<a class="next" href="(.+)"|$self->RecupSolution('http://www.gamespot.com'.$1)|ge;
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ #return 'http://www.gamespot.com/search.html?qs='.$word.'&sub=g&stype=11&type=11';
+ return 'http://www.gamespot.com/pages/search/solr_search_ajax.php?q='.$word.'&type=game&offset=0&tags_only=false&sort=false';
+ #return 'http://www.gamespot.com/search.html?qs=' .$word. '&tag=masthead%3Bsearch';
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+ my $found = index($url,"tpfplatformtpf");
+ if ( $found >= 0 )
+ {
+ $self->{url_plateforme} = substr($url, $found +length('tpfplatformtpf'),length($url)- $found -length('tpfplatformtpf'));
+ $url = substr($url, 0,$found);
+ }
+
+ return 'http://www.gamespot.com' . $url
+ if $url !~ /gamespot\.com/;
+ return $url if $url;
+ return 'http://www.gamespot.com';
+ }
+
+ sub getName
+ {
+ return 'GameSpot';
+ }
+
+ sub getAuthor
+ {
+ return 'Tian';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+
+ return "ISO-8859-1";
+ }
+ sub isPreferred
+ {
+ return 1;
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCJeuxVideoCom.pm b/lib/gcstar/GCPlugins/GCgames/GCJeuxVideoCom.pm new file mode 100644 index 0000000..560a582 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCJeuxVideoCom.pm @@ -0,0 +1,447 @@ +package GCPlugins::GCgames::GCJeuxVideoCom;
+
+###################################################
+#
+# Copyright 2005-2010 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginJeuxVideoCom;
+
+ use base 'GCPlugins::GCgames::GCgamesPluginsBase';
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+ $self->{inside}->{$tagname}++;
+ return if $self->{parsingEnded};
+ if ($self->{parsingList})
+ {
+ if (($tagname eq 'div') && (($attr->{id} eq 'new_mc') || ($attr->{id} eq 'old_mc')))
+ {
+ $self->{inResults} = 1;
+ }
+ elsif ($self->{inResults})
+ {
+ if ($tagname eq 'img')
+ {
+ $self->{currentPlatform} = $attr->{alt};
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ /^http/))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $self->{currentPlatform};
+ $self->{isGame} = 1;
+ }
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ if ($tagname eq 'tpfdebuttpf')
+ {
+ $self->{isTip} = 1;
+ }
+ elsif ( ($tagname eq 'h3') && ($attr->{class} eq 'titre_bloc') && ($self->{isTip} ne 4))
+ {
+ $self->{isTip} = 2;
+ }
+ elsif ( (($tagname eq 'h3') || ($tagname eq 'h4') || ($tagname eq 'h5') ) && ($self->{isTip} ne 3) && ($self->{isTip} ne 4))
+ {
+ $self->{isTip} = 2;
+ }
+ elsif ( ($tagname eq 'p') && ($self->{isTip} ne 3) && ($self->{isTip} ne 4))
+ {
+ $self->{isTip} = 1;
+ }
+ elsif ($tagname eq 'tpfstopsolution')
+ {
+ $self->{isTip} = 4;
+ }
+ elsif ($tagname eq 'tpffintpf')
+ {
+ $self->{isTip} = 3;
+ }
+ elsif ($tagname eq 'head')
+ {
+ $self->{isTip} = 0;
+ $self->{urlTips} = '';
+ }
+
+ }
+ else
+ {
+ if (($tagname eq 'meta') && ($attr->{property} eq 'og:image'))
+ {
+ my $cover = $attr->{content};
+ $cover =~ s|(http://[^/]*)/([^i])|$1/images/$2|;
+ if ($self->{bigPics})
+ {
+ $cover =~ s/-p(-|\.)/-g$1/;
+ $cover =~ s/t(\.jpg)/$1/;
+ }
+ my $back = $cover;
+ if (!($back =~ s/-avant(-|\.)/-arriere$1/))
+ {
+ $back =~ s/f(t?\.jpg)/r$1/;
+ }
+ $self->{curInfo}->{boxpic} = $cover;
+ $self->{curInfo}->{backpic} = $back;
+ }
+ elsif (($tagname eq 'li') && ($attr->{class} eq 'note_redac'))
+ {
+ $self->{is} = 'ratingpress';
+ }
+ elsif ( ($tagname eq 'div') && ($attr->{class} eq 'series_images') )
+ {
+ $self->{inScreenshots} = 1;
+ }
+ elsif ( ($tagname eq 'img') && ($self->{inScreenshots}) )
+ {
+ if (! $self->{curInfo}->{screenshot1})
+ {
+ $self->{curInfo}->{screenshot1} = $attr->{src};
+ $self->{curInfo}->{screenshot1} =~ s/.gif/.jpg/;
+ $self->{curInfo}->{screenshot1} =~ s/_m\.jpg/\.jpg/;
+ }
+ elsif (! $self->{curInfo}->{screenshot2})
+ {
+ $self->{curInfo}->{screenshot2} = $attr->{src};
+ $self->{curInfo}->{screenshot2} =~ s/.gif/.jpg/;
+ $self->{curInfo}->{screenshot2} =~ s/_m\.jpg/\.jpg/;
+ $self->{isScreen} = 0;
+ }
+ }
+ elsif (($attr->{href} =~ m^/(etajvhtm|cheats)/^) && ! ($self->{urlTips}))
+ {
+ $self->{urlTips} = $attr->{href};
+ }
+ elsif (($attr->{href} =~ m/test.htm/) && ! ($self->{curInfo}->{players}))
+ {
+ my $html = $self->loadPage($attr->{href});
+
+ my $found = index($html,"<li><strong>Multijoueurs :</strong>");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found +length('<li><strong>Multijoueurs :</strong>'),length($html)- $found -length('<li><strong>Multijoueurs :</strong>'));
+ $self->{curInfo}->{players} = substr($html, 0, index($html, "<"));
+
+ # Enleve les blancs en debut de chaine
+ $self->{curInfo}->{players} =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $self->{curInfo}->{players} =~ s/\s+$//;
+
+ $self->{curInfo}->{players} =~ s/-/1/;
+ $self->{curInfo}->{players} =~ s/non/1/i;
+ $self->{curInfo}->{players} =~ s/oui/Multijoueurs/i;
+ }
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ return if $self->{parsingEnded};
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'div')
+ {
+ $self->{inResults} = 0;
+ }
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ return if $self->{parsingEnded};
+ if ($self->{parsingList})
+ {
+ if ($self->{isGame})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext;
+ $self->{isGame} = 0;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+ if ($self->{isTip} eq 2)
+ {
+ $self->{curInfo}->{secrets} .= "\n\n" if $self->{curInfo}->{secrets};
+ $self->{curInfo}->{secrets} .= $origtext;
+ $self->{isTip} = 0;
+ }
+ elsif ($self->{isTip} eq 1)
+ {
+ chomp($origtext);
+ if ( ($self->{curInfo}->{secrets}) && ($origtext ne "") )
+ {
+ $self->{curInfo}->{secrets} .= "\n"
+ }
+ $self->{curInfo}->{secrets} .= $origtext;
+ $self->{isTip} = 0;
+ }
+ }
+ else
+ {
+ if ($self->{inside}->{h1})
+ {
+ if ($self->{inside}->{a})
+ {
+ $self->{curInfo}->{name} = $origtext;
+ $self->{curInfo}->{exclusive} = 1;
+ }
+ elsif ($self->{inside}->{span})
+ {
+ if ($origtext !~ /^Fiche /)
+ {
+ $origtext =~ s/^\s*-?\s*//;
+ $self->{curInfo}->{platform} = $origtext;
+ }
+ }
+ }
+ elsif ($self->{inside}->{strong})
+ {
+ $self->{is} = 'released' if ($origtext =~ /Sortie :/) || ($origtext =~ /Sortie France :/);
+ $self->{is} = 'genre' if $origtext =~ /Type :/;
+ $self->{is} = 'description' if $origtext =~ /Descriptif :/;
+ $self->{is} = 'editor' if $origtext =~ /Editeur :/;
+ $self->{is} = 'developer' if $origtext =~ /D.*?veloppeur :/;
+ $self->{is} = 'players' if $origtext =~ /Multijoueurs :/;
+ $self->{curInfo}->{exclusive} = 0 if $origtext =~ /Existe aussi sur :/;
+ }
+ elsif ($self->{is})
+ {
+ $origtext =~ s/^\s*//;
+ $origtext =~ s/\n$//;
+ if ($origtext)
+ {
+ if ($self->{is} eq 'players')
+ {
+ $origtext =~ s/-/1/;
+ $origtext =~ s/non/1/i;
+ $origtext =~ s/oui/Multijoueurs/i;
+ }
+ if ($self->{is} eq 'ratingpress')
+ {
+ $origtext =~ m|(\d*)/20|;
+ $origtext = int($1 / 2);
+ }
+ $self->{curInfo}->{$self->{is}} = $origtext;
+ $self->{is} = '';
+ }
+ }
+ }
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+ return $self->{urlTips};
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1
+ };
+
+ $self->{isTip} = 0;
+ $self->{urlTips} = "";
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+ if ($self->{parsingList})
+ {
+ $self->{parsingEnded} = 0;
+ $self->{inResults} = 0;
+ $self->{isGame} = 0;
+ }
+ elsif ($self->{parsingTips})
+ {
+ $html =~ s|<h4 class="lien_base"><a href="(.+)">Les astuces d|$self->RecupTips($1)|ge;
+ $html =~ s|<h4 class="lien_base"><a href="(.+)">La solution d|$self->RecupSolution($1)|ge;
+ $html =~ s|<h5><a href="(.+)">||gi;
+ $html =~ s|<h3 class="titre_bloc"><span>Plus d'infos</span></h3>|<tpfstopsolution>|gi;
+ $html =~ s|<div id="boxes_v">|<tpffintpf>|gi;
+ $html =~ s|<p class="lien_base">|<tpffintpf>|gi;
+ $html =~ s|<div class="player_article">|<tpffintpf>|gi;
+ $html =~ s|</object>|<tpfdebuttpf>|gi;
+ $html =~ s|<p class="title_bar">|<tpffintpf>|gi;
+ $html =~ s|<div class="bloc3" id="astuces_ajout"><h3 class="titre_bloc">|<tpffintpf>|gi;
+ $html =~ s|<br />|<p>|gi;
+ $html =~ s|<kbd>|<p>|gi;
+ $html =~ s|</kbd>||gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<span>||gi;
+ $html =~ s|<img src="../pics/psx/cercle.gif"\s*(alt="CERCLE")?\s*/>|Cercle|gi;
+ $html =~ s|<img src="../pics/psx/croix.gif"\s*(alt="CROIX")?\s*/>|Croix|gi;
+ $html =~ s|<img src="../pics/psx/carre.gif"\s*(alt="CARRE")?\s*/>|Carr.|gi;
+ $html =~ s|<img src="../pics/psx/triangle.gif"\s*(alt="TRIANGLE")?\s*/>|Triangle|gi;
+ $html =~ s|<img src="http://image.jeuxvideo.com/pics/btajv/psx/cercle.gif"\s*(alt="CERCLE")?\s*/>|Cercle|gi;
+ $html =~ s|<img src="http://image.jeuxvideo.com/pics/btajv/psx/croix.gif"\s*(alt="CROIX")?\s*/>|Croix|gi;
+ $html =~ s|<img src="http://image.jeuxvideo.com/pics/btajv/psx/carre.gif"\s*(alt="CARRE")?\s*/>|Carr.|gi;
+ $html =~ s|<img src="http://image.jeuxvideo.com/pics/btajv/psx/triangle.gif"\s*(alt="TRIANGLE")?\s*/>|Triangle|gi;
+ $html =~ s|\x{92}|'|gi;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|œ|oe|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+ }
+ else
+ {
+ $self->{is} = '';
+ $self->{inScreenshots} = 0;
+ }
+ return $html;
+ }
+
+ sub RecupTips
+ {
+ my ($self, $url) = @_;
+
+ my $html = $self->loadPage($url);
+ my $savenexturl = '';
+
+ my $found = index($html,"<p class=\"astuces_suiv\"> <a href=\"");
+ if ( $found >= 0 )
+ {
+ $savenexturl = substr($html, $found +length('<p class="astuces_suiv"> <a href="'),length($html)- $found -length('<p class="astuces_suiv"> <a href="'));
+ $savenexturl = substr($savenexturl, 0, index($savenexturl, "\""));
+ }
+
+ $found = index($html,"<div id=\"astuce_detail\" class=\"astuce\">");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found +length('<div id="astuce_detail" class="astuce">'),length($html)- $found -length('<div id="astuce_detail" class="astuce">'));
+ $html = substr($html, 0, index($html, "<div id=\"barre_outils_v2\">"));
+ if ( $savenexturl ne "" )
+ {
+ $html .= $self->RecupTips($savenexturl);
+ }
+ }
+ else
+ {
+ $html = '';
+ }
+ return "<tpfdebuttpf>" . $html . "<tpffintpf>";
+ }
+
+ sub RecupSolution
+ {
+ my ($self, $url) = @_;
+
+ my $html = $self->loadPage($url);
+ my $savenexturl = '';
+
+ my $found = index($html,"<p class=\"astuces_suiv\"><a href=\"");
+ if ( $found >= 0 )
+ {
+ $savenexturl = substr($html, $found +length('<p class="astuces_suiv"><a href="'),length($html)- $found -length('<p class="astuces_suiv"><a href="'));
+ $savenexturl = substr($savenexturl, 0, index($savenexturl, "\""));
+ }
+
+ $found = index($html,"<div id=\"astuce_detail\" class=\"soluce\">");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found +length('<div id="astuce_detail" class="soluce">'),length($html)- $found -length('<div id="astuce_detail" class="soluce">'));
+ $html = substr($html, 0, index($html, "<div id=\"barre_outils_v2\">"));
+ if ( $savenexturl ne "" )
+ {
+ $html .= $self->RecupSolution($savenexturl);
+ }
+ }
+ else
+ {
+ $html = '';
+ }
+ return "<tpfdebuttpf>" . $html . "<tpffintpf>";
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+ $word =~ s/\+/ /g;
+ return 'http://www.jeuxvideo.com/recherche/jeux/'.$word.'.htm';
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.jeuxvideo.com/';
+ }
+
+ sub getName
+ {
+ return 'jeuxvideo.com';
+ }
+
+ sub getAuthor
+ {
+ return 'Tian & TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+
+ return "ISO-8859-1";
+ }
+ sub isPreferred
+ {
+ return 1;
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCJeuxVideoFr.pm b/lib/gcstar/GCPlugins/GCgames/GCJeuxVideoFr.pm new file mode 100644 index 0000000..bec266f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCJeuxVideoFr.pm @@ -0,0 +1,425 @@ +package GCPlugins::GCgames::GCJeuxVideoFr;
+
+###################################################
+#
+# Copyright 2005-2011 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginJeuxVideoFr;
+
+ use base 'GCPlugins::GCgames::GCgamesPluginsBase';
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'div')
+ {
+ $self->{isGame} = 1
+ if $attr->{class} eq "jeuDesc";
+ }
+ if ($self->{isGame})
+ {
+ if ($tagname eq 'a')
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $attr->{title};
+ }
+ elsif ( ($tagname eq 'span') & ($attr->{class} eq "bleu2"))
+ {
+ $self->{isPlatform} = 1;
+ }
+ elsif ( ($tagname eq 'span') & ($attr->{class} eq "bleu6"))
+ {
+ $self->{isGenre} = 1;
+ }
+ elsif ( ($tagname eq 'p') & ($attr->{class} eq "jeuNote"))
+ {
+ $self->{isGame} = 0;
+ $self->{isEnd} = 1;
+ }
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+ if ( ($tagname eq 'input') && ($attr->{id} eq 'titreJeu'))
+ {
+ $self->{curInfo}->{name} = $attr->{value};
+ $self->{curInfo}->{platform} = $self->{url_plateforme};
+
+ my $html = $self->loadPage( $self->{url_screenshot} );
+
+ my $found = index($html,"div class=\"image_slideshow\">");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{screenshot1} = substr($html, $found +length('div class="image_slideshow">'),length($html)- $found -length('div class="image_slideshow">'));
+ $found = index($self->{curInfo}->{screenshot1},"<a href=\"");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{screenshot1} = substr($self->{curInfo}->{screenshot1}, $found +length('<a href="'),length($self->{curInfo}->{screenshot1})- $found -length('<a href="'));
+ $self->{curInfo}->{screenshot1} = substr($self->{curInfo}->{screenshot1}, 0,index($self->{curInfo}->{screenshot1},"\""));
+ $found = index($html,"\"imageNumberTotal\"");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{screenshot2} = substr($html, $found +length('"imageNumberTotal"'),length($html)- $found -length('"imageNumberTotal"'));
+ $found = index($self->{curInfo}->{screenshot2},"href=\"");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{screenshot2} = substr($self->{curInfo}->{screenshot2}, $found +length('href="'),length($self->{curInfo}->{screenshot2})- $found -length('href="'));
+ $self->{curInfo}->{screenshot2} = 'http://www.jeuxvideo.fr/' . substr($self->{curInfo}->{screenshot2}, 0,index($self->{curInfo}->{screenshot2},"\""));
+
+ $html = $self->loadPage( $self->{curInfo}->{screenshot2} );
+ $found = index($html,"div class=\"image_slideshow\">");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{screenshot2} = substr($html, $found +length('div class="image_slideshow">'),length($html)- $found -length('div class="image_slideshow">'));
+ $found = index($self->{curInfo}->{screenshot2},"<a href=\"");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{screenshot2} = substr($self->{curInfo}->{screenshot2}, $found +length('<a href="'),length($self->{curInfo}->{screenshot2})- $found -length('<a href="'));
+ $self->{curInfo}->{screenshot2} = substr($self->{curInfo}->{screenshot2}, 0,index($self->{curInfo}->{screenshot2},"\""));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'clearer spacer10'))
+ {
+ $self->{isInfo} = 0;
+ $self->{is} = '';
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'listing_apropos'))
+ {
+ $self->{isInfo} = 1;
+ }
+ elsif (($self->{isInfo} eq '1') && ($tagname eq 'span') && ($attr->{class} eq 'strong'))
+ {
+ $self->{isInfo} = 2;
+ }
+ elsif (($self->{is}) && ($tagname eq 'span') && ($attr->{class} eq 'noir'))
+ {
+ $self->{isInfo} = 3;
+ }
+ elsif (($self->{is}) && ($tagname eq 'div') && ($attr->{class} eq 'clearer'))
+ {
+ $self->{isInfo} = 1;
+ $self->{is} = '';
+ }
+ elsif (($tagname eq 'span') && ($attr->{class} eq 'note-jeux orange1') && ($attr->{property} eq 'v:rating'))
+ {
+ $self->{isNote} = 1;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'contentCommentaire'))
+ {
+ $self->{isDesc} = 1;
+ }
+ elsif (($self->{isDesc}) && ($tagname eq 'div') && ($attr->{class} eq 'clearer'))
+ {
+ $self->{isDesc} = 0;
+ }
+ elsif ( ($tagname eq 'img') && ($attr->{class} eq 'imgJeu') && !($attr->{src} =~ /blank/i))
+ {
+ $self->{curInfo}->{boxpic} = $attr->{src};
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ if ($self->{parsingList})
+ {
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+
+ if ($self->{isGenre})
+ {
+ if ($self->{itemsList}[$self->{itemIdx}]->{genre} eq '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{genre} = $origtext;
+ $self->{isGenre} = 0;
+ }
+ else
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{genre} = $self->{itemsList}[$self->{itemIdx}]->{genre} . ' - ' . $origtext;
+ $self->{isGenre} = 0;
+ }
+ }
+ elsif ($self->{isPlatform})
+ {
+ $origtext =~ s/\|//gi;
+ if ($self->{itemsList}[$self->{itemIdx}]->{platform} eq '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $origtext;
+ }
+ else
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{platform} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{platform} .= $origtext;
+ }
+ $self->{isPlatform} = 0;
+ }
+ elsif ($self->{isEnd})
+ {
+ my @array = split(/,/,$self->{itemsList}[$self->{itemIdx}]->{platform});
+ my $element;
+
+ my $SaveName = $self->{itemsList}[$self->{itemIdx}]->{name};
+ my $SaveUrl = $self->{itemsList}[$self->{itemIdx}]->{url};
+ my $SaveGenre = $self->{itemsList}[$self->{itemIdx}]->{genre};
+ $self->{itemIdx}--;
+
+ foreach $element (@array)
+ {
+ # Enleve les blancs en debut de chaine
+ $element =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $element =~ s/\s+$//;
+
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $SaveName;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $SaveUrl . 'tpfplatformtpf' . $element;
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $element;
+ $self->{itemsList}[$self->{itemIdx}]->{genre} = $SaveGenre;
+ }
+ $self->{isEnd} = 0;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+ if ($self->{isInfo} eq 2)
+ {
+ $self->{is} = 'genre' if $origtext =~ /Genre :/;
+ $self->{is} = 'editor' if $origtext =~ /Editeur :/;
+ $self->{is} = 'developer' if $origtext =~ /D.veloppeur :/;
+ $self->{is} = 'players' if $origtext =~ /Nb joueurs :/;
+ $self->{is} = 'released' if $origtext =~ /Sortie :/;
+ $self->{is} = 'exclusive' if $origtext =~ /Plateformes :/;
+ }
+ elsif ($self->{isInfo} eq 3)
+ {
+ # Enleve le caractere | qui separe les champs
+ $origtext =~ s/\|//gi;
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+ if ($origtext)
+ {
+ if ($self->{is} eq 'players')
+ {
+ $origtext =~ s/Exclusivement Solo/1/i;
+ $origtext =~ s/\s*joueurs?//i;
+ }
+
+ if ($self->{curInfo}->{$self->{is}} eq '')
+ {
+ if ($self->{is} eq 'exclusive')
+ {
+ $self->{curInfo}->{$self->{is}} = 'true';
+ if ($origtext =~ /$self->{curInfo}->{platform}/i)
+ {
+ $self->{curInfo}->{platform} = $origtext;
+ }
+ }
+ else
+ {
+ $self->{curInfo}->{$self->{is}} = $origtext;
+ }
+ }
+ else
+ {
+ if ($self->{is} eq 'exclusive')
+ {
+ $self->{curInfo}->{$self->{is}} = 'false';
+ if ($origtext =~ /$self->{curInfo}->{platform}/i)
+ {
+ $self->{curInfo}->{platform} = $origtext;
+ }
+ }
+ else
+ {
+ $self->{curInfo}->{$self->{is}} = $self->{curInfo}->{$self->{is}} . ', ' . $origtext;
+ }
+ }
+
+ }
+ }
+ elsif ($self->{isNote} eq 1)
+ {
+ $self->{curInfo}->{ratingpress} = $origtext;
+ $self->{isNote} = 0;
+ }
+ elsif ($self->{isDesc} eq 1)
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+ $self->{curInfo}->{description} .= $origtext;
+ }
+ }
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+
+ return $self->{url_tips};
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1,
+ released => 0,
+ genre => 1
+ };
+
+ $self->{isGame} = 0;
+ $self->{isPlatform} = 0;
+ $self->{isGenre} = 0;
+ $self->{isEnd} = 0;
+ $self->{isInfo} = 0;
+ $self->{isNote} = 0;
+ $self->{isDesc} = 0;
+ $self->{isTip} = 0;
+ $self->{url_plateforme} = '';
+ $self->{url_screenshot} = '';
+ $self->{url_tips} = '';
+ $self->{is} = '';
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ return 'http://www.jeuxvideo.fr/r/'.$word.'/';
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ my $found = index($url,"tpfplatformtpf");
+ if ( $found >= 0 )
+ {
+ $self->{url_plateforme} = substr($url, $found +length('tpfplatformtpf'),length($url)- $found -length('tpfplatformtpf'));
+ $url = substr($url, 0,$found);
+ }
+
+ $self->{url_screenshot} = 'http://www.jeuxvideo.fr' . $url . 'image-photo/';
+ $self->{url_tips} = 'http://www.jeuxvideo.fr' . $url . 'astuce-code/';
+
+ return 'http://www.jeuxvideo.fr' . $url;
+ }
+
+ sub getName
+ {
+ return 'jeuxvideo.fr';
+ }
+
+ sub getAuthor
+ {
+ return 'Tian';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+
+ return "ISO-8859-1";
+ }
+
+ sub isPreferred
+ {
+ return 1;
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCLudus.pm b/lib/gcstar/GCPlugins/GCgames/GCLudus.pm new file mode 100644 index 0000000..42b42a7 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCLudus.pm @@ -0,0 +1,367 @@ +package GCPlugins::GCgames::GCLudus; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesCommon; + +{ + package GCPlugins::GCgames::GCPluginLudus; + + use base 'GCPlugins::GCgames::GCgamesPluginsBase'; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + if (($tagname eq 'span') && ( $attr->{class} eq 'titolini' )) + { + $self->{isGame} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isGame})) + { + $self->{isGame} = 0 ; + $self->{isInfo} = 0 ; + } + elsif (($tagname eq 'a') && ($self->{isGame})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.ludus.it/" . $attr->{href}; + $self->{isName} = 1 ; + $self->{isInfo} = 1 ; + } + elsif (($tagname eq 'td') && ( $attr->{class} eq 'trat2' ) && ($self->{isInfo} eq '1')) + { + # le deuxieme champs est le type de donnees + $self->{isInfo} = 2 ; + } + elsif (($tagname eq 'td') && ( $attr->{class} eq 'trat2' ) && ($self->{isInfo} eq '2')) + { + # le troisieme champs est la plateforme + $self->{isPlatform} = 1 ; + $self->{isInfo} = 3 ; + } + elsif (($tagname eq 'td') && ( $attr->{class} eq 'trat2' ) && ($self->{isInfo} eq '3')) + { + # le quatrieme champs est la date de sortie + $self->{isDate} = 1 ; + $self->{isInfo} = 0 ; + } + } + elsif ($self->{parsingTips}) + { + } + else + { + if (($tagname eq 'td') && ($attr->{colspan} eq '2') && ($attr->{class} eq 'titoli2')) + { + $self->{isName} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'testo4')) + { + $self->{isAnalyse} = 1 ; + } + elsif ($self->{isPlatform} eq 1) + { + $self->{isPlatform} = 2 ; + } + elsif ($self->{isEditor} eq 1) + { + $self->{isEditor} = 2 ; + } + elsif ($self->{isDeveloper} eq 1) + { + $self->{isDeveloper} = 2 ; + } + elsif ($self->{isGenre} eq 1) + { + $self->{isGenre} = 2 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'testo') && ($attr->{align} eq '')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'tpfsautdeligne') && ($self->{isDescription})) + { + $self->{curInfo}->{description} .= "\n"; + } + elsif (($tagname eq 'table') && ($self->{isDescription})) + { + $self->{isDescription} = 0 ; + } + elsif ( ($tagname eq 'a') && (index($attr->{onclick},"adafl=win") >= 0) && ($self->{curInfo}->{screenshot1} eq '') && ($self->{curInfo}->{screenshot2} eq '')) + { + $self->{isScreen} = 1; + } + elsif ( ($tagname eq 'img') && ($self->{isScreen}) ) + { + if ($self->{curInfo}->{screenshot1} eq '') + { + $self->{curInfo}->{screenshot1} = $attr->{src}; + $self->{curInfo}->{screenshot1} =~ s|//|http://www.ludus.it/|; + my $found = index(reverse($self->{curInfo}->{screenshot1}),"/"); + if ( $found >= 0 ) + { + my $tempscreen = substr(reverse($self->{curInfo}->{screenshot1}), $found +length('/'),length($self->{curInfo}->{screenshot1})- $found -length('/')); + my $tempscreen2 = substr(reverse($self->{curInfo}->{screenshot1}), 0, $found); + $tempscreen2 = "/immagini_grandi/" . reverse($tempscreen2); + $self->{curInfo}->{screenshot1} = reverse($tempscreen) . $tempscreen2; + } + } + elsif ($self->{curInfo}->{screenshot2} eq '') + { + $self->{curInfo}->{screenshot2} = $attr->{src}; + $self->{curInfo}->{screenshot2} =~ s|//|http://www.ludus.it/|; + my $found = index(reverse($self->{curInfo}->{screenshot2}),"/"); + if ( $found >= 0 ) + { + my $tempscreen = substr(reverse($self->{curInfo}->{screenshot2}), $found +length('/'),length($self->{curInfo}->{screenshot2})- $found -length('/')); + my $tempscreen2 = substr(reverse($self->{curInfo}->{screenshot2}), 0, $found); + $tempscreen2 = "/immagini_grandi/" . reverse($tempscreen2); + $self->{curInfo}->{screenshot2} = reverse($tempscreen) . $tempscreen2; + } + $self->{isScreen} = 0; + } + } + elsif (($tagname eq 'a') && (index($attr->{href},"post_form") >= 0)) + { + $self->{curInfo}->{boxpic} = $attr->{href}; + $self->{curInfo}->{boxpic} =~ s|/LINGUA/IT||; + my $found = index(reverse($self->{curInfo}->{boxpic}),"/"); + if ( $found >= 0 ) + { + $self->{curInfo}->{boxpic} = substr(reverse($self->{curInfo}->{boxpic}), 0, $found); + $self->{curInfo}->{boxpic} = "http://www.ludus.it/copertine/giochi/" . reverse($self->{curInfo}->{boxpic}) . ".jpg"; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//; + if ($self->{isName}) + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext; + $self->{isName} = 0; + } + elsif ($self->{isDate}) + { + $self->{itemsList}[$self->{itemIdx}]->{released} = $origtext; + $self->{isDate} = 0; + } + elsif ($self->{isPlatform}) + { + $origtext =~ s/Pc/PC/; + $self->{itemsList}[$self->{itemIdx}]->{platform} = $origtext; + $self->{isPlatform} = 0; + } + } + elsif ($self->{parsingTips}) + { + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + if ($self->{isName}) + { + my $found = index(reverse($origtext),"-"); + if ( $found >= 0 ) + { + $origtext = substr(reverse($origtext), $found +length('-'),length($origtext)- $found -length('-')); + $origtext = reverse($origtext); + } + $self->{curInfo}->{name} = $origtext; + $self->{curInfo}->{released} = $self->{itemsList}[$self->{wantedIdx}]->{released}; + $self->{isName} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isPlatform} = 1 if ($origtext =~ m/Piattaforma:/i); + $self->{isEditor} = 1 if ($origtext =~ m/Software House:/i); + $self->{isDeveloper} = 1 if ($origtext =~ m/Sviluppatore:/i); + $self->{isGenre} = 1 if ($origtext =~ m/Genere:/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isGenre} eq 2) + { + my @array = split(/\//,$origtext); + my $element; + foreach $element (@array) + { + # Enleve les blancs en debut de chaine + $element =~ s/^\s+//; + $self->{curInfo}->{genre} .= $element; + $self->{curInfo}->{genre} .= ","; + } + $self->{isGenre} = 0; + } + elsif ($self->{isDeveloper} eq 2) + { + $self->{curInfo}->{developer} = $origtext; + $self->{isDeveloper} = 0 ; + } + elsif ($self->{isEditor} eq 2) + { + $self->{curInfo}->{editor} = $origtext; + $self->{isEditor} = 0 ; + } + elsif ($self->{isPlatform} eq 2) + { + $origtext =~ s/Pc/PC/; + $self->{curInfo}->{platform} = $origtext; + $self->{isPlatform} = 0; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext; + } + } + } + + sub getTipsUrl + { + my $self = shift; + + return ; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + name => 1, + platform => 1, + genre => 0, + released => 1 + }; + + $self->{isInfo} = 0; + $self->{isName} = 0; + $self->{isGame} = 0; + $self->{isPlatform} = 0 ; + $self->{isAnalyse} = 0; + $self->{isEditor} = 0; + $self->{isDeveloper} = 0; + $self->{isDate} = 0; + $self->{isGenre} = 0; + $self->{isScreen} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|<tpfsautdeligne>|gi; + $html =~ s|<br />|<tpfsautdeligne>|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s/\+/ /g; + return ('http://www.ludus.it/code/lista_alfabetica_giochi/LINGUA/IT', ["categoria" => "2", "SEARCH_STRING" => "$word"] ); + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return 'Ludus'; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'IT'; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getSearchFieldsArray + { + return ['name']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCMobyGames.pm b/lib/gcstar/GCPlugins/GCgames/GCMobyGames.pm new file mode 100644 index 0000000..c6650c3 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCMobyGames.pm @@ -0,0 +1,541 @@ +package GCPlugins::GCgames::GCMobyGames;
+
+###################################################
+#
+# Copyright 2005-2011 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginMobyGames;
+
+ use base 'GCPlugins::GCgames::GCgamesPluginsBase';
+ use HTML::Entities;
+
+ sub extractTips
+ {
+ my ($self, $html_ini) = @_;
+ my $answer = "";
+ my @tmpAnswer = ();
+ my $html = $self->loadPage($html_ini, 0, 1);
+ $html =~ s|<pre>||gi;
+ $html =~ s|</pre>||gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ my $found = index($html,"class=\"sbL sbB sbT\">");
+ if ( $found >= 0 )
+ {
+ $answer = substr($html, $found + length("class=\"sbL sbB sbT\">"),length($html)- $found -length("class=\"sbL sbB sbT\">") );
+ $answer = substr($answer, 0, index($answer,"</td><td align="));
+
+ $tmpAnswer[0] = decode_entities($answer);
+
+ $found = index($html,"class=\"sbR sbL sbB\"><p>");
+ if ( $found >= 0 )
+ {
+ my $html2 = substr($html, $found + length("class=\"sbR sbL sbB\"><p>"),length($html)- $found -length("class=\"sbR sbL sbB\"><p>") );
+ $html2 = substr($html2, 0, index($html2,"</p>"));
+ $html2 =~ s/<br>/\n/gi;
+ $html2 =~ s|<p>|\n|gi;
+ $html2 =~ s|</p>||gi;
+
+ $tmpAnswer[1] = decode_entities($html2);
+ }
+
+ }
+
+ return @tmpAnswer;
+ }
+
+ sub extractPlayer
+ {
+ my ($self, $html_ini, $word) = @_;
+ my $html = 0;
+ my $found = index($html_ini,$word);
+ if ( $found >= 0 )
+ {
+ $html = substr($html_ini, $found + length($word),length($html_ini)- $found -length($word) );
+ $html = substr($html,0, index($html,"</a>") );
+ $html = reverse($html);
+ $html = substr($html,0, index($html,">") );
+ $html = reverse($html);
+ $html =~ s/ / /g;
+ $html =~ s/1 Player/1/;
+ }
+ return $html;
+ }
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+ if ($self->{parsingList})
+ {
+ if ( !$self->{insideSearchImage}
+ && ($tagname eq 'a')
+ && ( substr($attr->{href},0,6) eq '/game/' ) )
+ {
+ # Test if there is a platform name in it
+ # (i.e. if we can find a second slash after game/ )
+ if ($attr->{href} =~ m|/game/[^/]*/|)
+ {
+ if ($self->{currentName})
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = 'http://www.mobygames.com'.$attr->{href}.'';
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $self->{currentName};
+ $self->{isPlatform} = 1;
+ }
+ else
+ {
+ # This is a game we want to add
+ $self->{isGame} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = 'http://www.mobygames.com'.$attr->{href}.'';
+ $self->{isName} = 1 ;
+ }
+ }
+ else
+ {
+ # We will need the name later
+ $self->{isGameName} = 1;
+ }
+ }
+ elsif ( ($tagname eq 'a') && ( substr($attr->{href},0,7) eq '/search' ) )
+ {
+ $self->{isGame} = 0;
+ }
+ elsif ($tagname eq 'div')
+ {
+ if ($attr->{class} eq 'searchResult')
+ {
+ $self->{currentName} = '';
+ }
+ elsif ($attr->{class} eq 'searchImage')
+ {
+ $self->{insideSearchImage} = 1;
+ }
+ }
+ elsif ($tagname eq 'em')
+ {
+ $self->{isDate} = 1;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ if (($tagname eq 'table') && ($attr->{summary} eq 'List of Tips and Tricks'))
+ {
+ $self->{isSectionTips} = 2;
+ }
+ elsif ( ($tagname eq 'b') && ($self->{isSectionTips} eq '2') )
+ {
+ $self->{isSectionTips} = 1;
+ }
+ elsif ( ($tagname eq 'tr') && (($attr->{class} eq 'mb1') || ($attr->{class} eq 'mb2')) )
+ {
+ $self->{isTip} = 1 if ($self->{isTip} eq 2);
+ $self->{isCode} = 1 if ($self->{isCode} eq 2);
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isTip} eq 1))
+ {
+ my @tips = $self->extractTips('http://www.mobygames.com'.$attr->{href}.'');
+ if ($tips[0] =~ m/unlock/i)
+ {
+ $Text::Wrap::columns = 80;
+ $tips[1] = Text::Wrap::wrap('', '', $tips[1]);
+ #$self->{tmpCheatLine} = [];
+ #push @{$self->{tmpCheatLine}}, @tips;
+ push @{$self->{curInfo}->{unlockable}}, \@tips;
+ }
+ else
+ {
+ my $answer = $tips[0];
+ $answer .= "\n";
+ $answer .= $tips[1];
+ if ( ($self->{curInfo}->{secrets}) && ($answer ne "") )
+ {
+ $self->{curInfo}->{secrets} .= "\n\n\n"
+ }
+ $self->{curInfo}->{secrets} .= $answer;
+ }
+ $self->{isTip} = 2;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isCode} eq 1))
+ {
+ my @tips = $self->extractTips('http://www.mobygames.com'.$attr->{href}.'');
+ @tips = reverse(@tips);
+ $Text::Wrap::columns = 80;
+ $tips[1] = Text::Wrap::wrap('', '', $tips[1]);
+ #$self->{tmpCheatLine} = [];
+ #push @{$self->{tmpCheatLine}}, @tips;
+ push @{$self->{curInfo}->{code}}, \@tips;
+
+ $self->{isCode} = 2;
+
+ }
+ elsif ($tagname eq 'br')
+ {
+ $self->{isTip} = 3;
+ $self->{isCode} = 3;
+ $self->{isSectionTips} = 0;
+ }
+ elsif ($tagname eq 'head')
+ {
+ $self->{isTip} = 0;
+ $self->{isCode} = 0;
+ $self->{isSectionTips} = 0;
+ }
+
+ }
+ else
+ {
+
+ if ($tagname eq 'div')
+ {
+ for ($attr->{id})
+ {
+ /^gameTitle$/ && ($self->{isName} = 1, last);
+ /^gamePlatform/ && ($self->{isPlatform} = 1, last);
+ #/^coreGameCover/ && ($self->{isBox} = 1, last);
+ /^coreGameRelease/ && ($self->{isEditor} = 1, last);
+ }
+
+ if ($attr->{class} =~ m/scoreBoxBig/)
+ {
+ $self->{isRating} = 1;
+ }
+
+ if ($self->{curInfo}->{genre})
+ {
+ $self->{isGenre} = 0;
+ }
+
+ $self->{isDescription} = 0;
+
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isName}) )
+ {
+ $self->{is} = 'name';
+ $self->{curInfo}->{exclusive} = 1;
+ $self->{isName} = 0;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isPlatform}) )
+ {
+ $self->{is} = 'platform';
+ $self->{isPlatform} = 0;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isEditor}) )
+ {
+ $self->{is} = 'editor';
+ $self->{isEditor} = 0;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isDeveloper}) )
+ {
+ $self->{is} = 'developer';
+ $self->{isDeveloper} = 0;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isDate}) )
+ {
+ $self->{is} = 'released';
+ $self->{isDate} = 0;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isGenre}) )
+ {
+ $self->{is} = 'genre';
+ }
+ elsif ($tagname eq 'img')
+ {
+ if ($attr->{src} =~ m|covers/small|)
+ {
+ $attr->{src} =~ s|/small/|/large/|
+ if $self->{bigPics};
+ $self->{curInfo}->{boxpic} = $attr->{src};
+ # From here we try to get back cover
+ my $covers = $self->loadPage($self->{rootUrl}.'/cover-art', 0, 1);
+ $covers =~ m|<img alt=".*?Back Cover".*?src="([^"]*)"|;
+ $self->{curInfo}->{backpic} = $1;
+ $self->{curInfo}->{backpic} =~ s|/small/|/large/|
+ if $self->{bigPics};
+ }
+ }
+ elsif ($tagname eq 'html')
+ {
+ my $html = $self->loadPage($self->{curInfo}->{$self->{urlField}}.'/techinfo', 0, 1);
+ my $player_offline = $self->extractPlayer($html, "Number of Players: Offline" );
+ my $player_online = $self->extractPlayer($html, "Number of Players: Online" );
+ my $player_total = $self->extractPlayer($html, "Number of Players Supported" );
+
+ if ($player_total)
+ {
+ $self->{curInfo}->{players} = $player_total;
+ }
+ else
+ {
+ if ($player_offline)
+ {
+ $self->{curInfo}->{players} = 'Offline: '.$player_offline;
+ }
+ if ($player_online)
+ {
+ if ( $self->{curInfo}->{players} )
+ {
+ $self->{curInfo}->{players} .= '; Online: '.$player_online;
+ }
+ else
+ {
+ $self->{curInfo}->{players} = 'Online: '.$player_online;
+ }
+ }
+ }
+
+ $html = $self->loadPage($self->{curInfo}->{$self->{urlField}}.'/screenshots', 0, 1);
+ my $screen = 1;
+ while ($html =~ m|src="(/images/shots/[^"]*?)"|g)
+ {
+ $self->{curInfo}->{'screenshot'.$screen} = 'http://www.mobygames.com' . $1;
+ $self->{curInfo}->{'screenshot'.$screen} =~ s|/images/shots/s/|/images/shots/l/|
+ if $self->{bigPics};
+ $screen++;
+ last if $screen > 2;
+ }
+ }
+ elsif ( ($tagname eq 'br') && ($self->{isDescription}) )
+ {
+ $self->{curInfo}->{description} .= "\n";
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+ $self->{inside}->{$tagname}--;
+ if ($self->{parsingList} && ($tagname eq 'div'))
+ {
+ $self->{insideSearchImage} = 0;
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isName})
+ {
+ #$self->{itemsList}[$self->{itemIdx}]->{name} = $origtext;
+ if ($origtext !~ /^Game:/)
+ {
+ if (!$self->{currentName})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext;
+ }
+ $self->{isName} = 0;
+ }
+ }
+ elsif ($self->{isPlatform})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $origtext;
+ $self->{isPlatform} = 0;
+ }
+ elsif ($self->{isGameName})
+ {
+ $self->{currentName} = $origtext;
+ $self->{isGameName} = 0;
+ }
+ elsif ($self->{isDate})
+ {
+ # <em> tags enclose both dates and the 'a.k.a.' text, so make sure we
+ # ignore the aka ones
+ if ($origtext !~ /^a\.k\.a\./)
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{released} = $origtext;
+ if (! $self->{itemsList}[$self->{itemIdx}]->{platform})
+ {
+ $self->{previous} =~ s/[\s\(]*$//g;
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $self->{previous};
+ }
+ }
+ $self->{isDate} = 0;
+ }
+ $self->{previous} = $origtext;
+ }
+ elsif ($self->{parsingTips})
+ {
+ if ($self->{isSectionTips} eq 1)
+ {
+ if ($origtext =~ m/General Hints\/Tips/i)
+ {
+ $self->{isTip} = 2;
+ $self->{isCode} = 0;
+ }
+ elsif ($origtext =~ m/Cheats\/Codes/i)
+ {
+ $self->{isTip} = 0;
+ $self->{isCode} = 2;
+ }
+ $self->{isSectionTips} = 2;
+ }
+ }
+ else
+ {
+ if ($self->{is})
+ {
+ $origtext =~ s/^\s*//;
+
+ if ($self->{is} eq 'platform')
+ {
+ $self->{curInfo}->{$self->{is}} = $origtext;
+ $self->{curInfo}->{platform} =~ s/DOS/PC/;
+ $self->{curInfo}->{platform} =~ s/Windows/PC/;
+ }
+ elsif ($self->{is} eq 'genre')
+ {
+ push @{$self->{curInfo}->{genre}}, [ $origtext ];
+ }
+ else
+ {
+ $self->{curInfo}->{$self->{is}} = $origtext;
+ }
+
+ $self->{is} = '';
+ }
+ elsif ($self->{isRating})
+ {
+ $self->{curInfo}->{ratingpress} = int($origtext/10+0.5);
+ $self->{isRating} = 0;
+ }
+ elsif ($self->{isDescription})
+ {
+ $self->{curInfo}->{description} .= $origtext;
+ }
+ elsif ($origtext eq 'Developed by')
+ {
+ $self->{isDeveloper} = 1
+ }
+ elsif ( ($origtext eq 'Also For') || (($origtext eq 'Platforms')))
+ {
+ $self->{curInfo}->{exclusive} = 0;
+ }
+ elsif ($origtext eq 'Released')
+ {
+ $self->{isDate} = 1
+ }
+ elsif ($origtext eq 'Genre')
+ {
+ $self->{isGenre} = 1
+ }
+ elsif ($origtext eq 'Description')
+ {
+ $self->{isDescription} = 1
+ }
+ }
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+ my $url = $self->{curInfo}->{$self->{urlField}}.'/hints';
+ $url =~ s/##MobyGames//;
+ return $url;
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1,
+ released => 1
+ };
+
+ $self->{isName} = 0;
+ $self->{isGame} = 0;
+ $self->{isGameName} = 0;
+ $self->{isPlatform} = 0;
+ $self->{isEditor} = 0;
+ $self->{isDeveloper} = 0;
+ $self->{isDate} = 0;
+ $self->{isGenre} = 0;
+ $self->{isDescription} = 0;
+ $self->{isBox} = 0;
+ $self->{isSectionTips} = 0;
+ $self->{isTip} = 0;
+ $self->{isCode} = 0;
+ $self->{is} = '';
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+ $self->{rootUrl} = $self->{loadedUrl};
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+ return 'http://www.mobygames.com/search/quick?q='.$word.'&p=-1&search=Go&sFilter=1&sG=on';
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.mobygames.com/';
+ }
+
+ sub getName
+ {
+ return 'MobyGames';
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+
+ return "ISO-8859-1";
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCNextGame.pm b/lib/gcstar/GCPlugins/GCgames/GCNextGame.pm new file mode 100644 index 0000000..b884b54 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCNextGame.pm @@ -0,0 +1,480 @@ +package GCPlugins::GCgames::GCNextGame;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+
+{
+ package GCPlugins::GCgames::GCPluginNextGame;
+
+ use base 'GCPlugins::GCgames::GCgamesPluginsBase';
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isGenre} eq '1')
+ {
+ $self->{isGenre} = 2 ;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'box_searchresult'))
+ {
+ $self->{isGame} = 1 ;
+ $self->{itemIdx}++;
+ }
+ elsif (($tagname eq 'ul') && ($attr->{class} eq 'platforms'))
+ {
+ $self->{isPlatform} = 1 ;
+ }
+ elsif (($tagname eq 'li') && ($self->{isPlatform} eq 1))
+ {
+ $self->{isPlatform} = 2 ;
+ }
+ elsif (($tagname eq 'hr') && ($attr->{class} eq 'clear'))
+ {
+ $self->{isPlatform} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{class} eq 'blu'))
+ {
+ return if $self->{alreadyRetrieved}->{$attr->{href}};
+ $self->{alreadyRetrieved}->{$attr->{href}} = 1;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{isName} = 1 ;
+ }
+ elsif (($tagname eq 'dt') && ($self->{isGame} eq '1'))
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+
+ if ($self->{isDeveloper} eq 1)
+ {
+ $self->{isDeveloper} = 2;
+ }
+ elsif ($self->{isGenre} eq 1)
+ {
+ $self->{isGenre} = 2;
+ }
+ elsif ($self->{isEditor} eq 1)
+ {
+ $self->{isEditor} = 2 ;
+ }
+ elsif ($self->{isDate} eq 1)
+ {
+ $self->{isDate} = 2 ;
+ }
+ elsif ($self->{isPlayer} eq 1)
+ {
+ $self->{isPlayer} = 2;
+ }
+ elsif ($self->{isPlatform} eq 1)
+ {
+ $self->{isPlatform} = 2;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'box_liquid'))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($tagname eq 'head')
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'h1') && ($attr->{class} eq 'blu'))
+ {
+ $self->{isName} = 1 ;
+ }
+ elsif (($tagname eq 'hr') && ($attr->{class} eq 'clear'))
+ {
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif (($tagname eq 'dl') && ($attr->{class} eq 'datasheet_column'))
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'dt') && ($self->{isAnalyse} eq '1'))
+ {
+ $self->{isAnalyse} = 2 ;
+ }
+ elsif (($tagname eq 'td') && ($attr->{class} eq 'active current'))
+ {
+ $self->{isPlatform} = 1 ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{id} eq 'datasheet_packshot'))
+ {
+ $self->{curInfo}->{boxpic} = $attr->{src};
+ if ($self->{bigPics})
+ {
+ $self->{curInfo}->{boxpic} =~ s|.T160.|.|gi;
+ }
+ }
+ elsif ( ($tagname eq 'img') && ($attr->{class} eq 'thumb') )
+ {
+ if ($self->{curInfo}->{screenshot1} eq '')
+ {
+ $self->{curInfo}->{screenshot1} = $attr->{src};
+ if ($self->{bigPics})
+ {
+ $self->{curInfo}->{screenshot1} =~ s|.T200.|.|gi;
+ }
+ }
+ elsif ($self->{curInfo}->{screenshot2} eq '')
+ {
+ $self->{curInfo}->{screenshot2} = $attr->{src};
+ if ($self->{bigPics})
+ {
+ $self->{curInfo}->{screenshot2} =~ s|.T200.|.|gi;
+ }
+ }
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} eq 'REVIEW'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ elsif (($tagname eq 'ul') && ($attr->{class} eq 'platforms') && ($self->{isDescription} eq 1))
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{class} eq 'blu') && ($self->{isDescription} eq 3))
+ {
+ my $html = $self->loadPage( $attr->{href}, 0, 1 );
+ $html =~ s|&|&|gi;
+ my $found = index($html,"<div class=\"testo edit_inline_box\" id=\"id_text\">");
+ if ( $found >= 0 )
+ {
+ $html = "<>" . substr($html, $found +length('<div class="testo edit_inline_box" id="id_text">'),length($html)- $found -length('<div class="testo edit_inline_box" id="id_text">'));
+
+ $found = index($html,"<div class=\"byline\">");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, 0, $found);
+ }
+
+ my @array = split(/</,$html);
+ my $element;
+
+ foreach $element (@array)
+ {
+ $found = index($element,">");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{description} .= substr($element, $found +length('>'),length($element)- $found -length('>'));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isName})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext;
+ $self->{isName} = 0;
+ }
+ elsif ($self->{isPlatform} eq 2)
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+
+ if (($self->{itemsList}[$self->{itemIdx}]->{platform} eq '') && ($origtext ne ''))
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{platform} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{platform} .= $origtext;
+ }
+
+ $self->{isPlatform} = 1 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isGenre} = 1 if ($origtext =~ m/Genere/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isGenre} eq 2)
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{genre} = $origtext;
+
+ my @array = split(/,/,$self->{itemsList}[$self->{itemIdx}]->{platform});
+ my $element;
+
+ my $SaveName = $self->{itemsList}[$self->{itemIdx}]->{name};
+ my $SaveUrl = $self->{itemsList}[$self->{itemIdx}]->{url};
+ my $SaveGenre = $self->{itemsList}[$self->{itemIdx}]->{genre};
+ $self->{itemIdx}--;
+
+ if ($SaveName ne "")
+ {
+ foreach $element (@array)
+ {
+ # Enleve les blancs en debut de chaine
+ $element =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $element =~ s/\s+$//;
+
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $SaveName;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $SaveUrl . $element .'/';
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $element;
+ $self->{itemsList}[$self->{itemIdx}]->{genre} = $SaveGenre;
+ }
+ }
+ else
+ {
+ $self->{itemIdx}++;
+ delete $self->{itemsList}[$self->{itemIdx}];
+ $self->{itemIdx}--;
+ }
+
+ $self->{isGenre} = 0;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ if ($self->{isName})
+ {
+ $self->{curInfo}->{name} = $origtext;
+ $self->{isName} = 0 ;
+
+ if ($self->{ean} ne '')
+ {
+ $self->{curInfo}->{ean} = $self->{ean};
+ }
+
+ }
+ elsif ($self->{isPlatform} eq 2)
+ {
+ $self->{curInfo}->{platform} = $origtext;
+ $self->{isPlatform} = 0;
+ }
+ elsif ($self->{isAnalyse} eq 2)
+ {
+ $self->{isEditor} = 1 if ($origtext =~ m/Produttore/i);
+ $self->{isDeveloper} = 1 if ($origtext =~ m/Sviluppatore/i);
+ $self->{isGenre} = 1 if ($origtext =~ m/Genere/i);
+ $self->{isDate} = 1 if ($origtext =~ m/Disponibile/i);
+ $self->{isPlayer} = 1 if ($origtext =~ m/Giocatori/i);
+
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif ($self->{isDeveloper} eq 2)
+ {
+ $self->{curInfo}->{developer} = $origtext;
+ $self->{isDeveloper} = 0;
+ }
+ elsif ($self->{isGenre} eq 2)
+ {
+ $self->{curInfo}->{genre} = $origtext;
+ $self->{isGenre} = 0;
+ }
+ elsif ($self->{isEditor} eq 2)
+ {
+ $self->{curInfo}->{editor} = $origtext;
+ $self->{isEditor} = 0 ;
+ }
+ elsif ($self->{isDate} eq 2)
+ {
+ $self->{curInfo}->{released} = $origtext;
+ $self->{isDate} = 0 ;
+ }
+ elsif ($self->{isPlayer} eq 2)
+ {
+ $self->{curInfo}->{players} = $origtext;
+ $self->{isPlayer} = 0 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ if ($origtext =~ m/$self->{curInfo}->{platform}/i)
+ {
+ $self->{isDescription} = 3;
+ }
+ else
+ {
+ $self->{isDescription} = 1;
+ }
+ }
+
+ }
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+
+ return ;
+
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1,
+ genre => 1,
+ released => 0
+ };
+
+ $self->{isName} = 0;
+ $self->{isGame} = 0;
+ $self->{isPlatform} = 0;
+ $self->{isAnalyse} = 0 ;
+ $self->{isGenre} = 0;
+ $self->{isEditor} = 0;
+ $self->{isDeveloper} = 0;
+ $self->{isDate} = 0;
+ $self->{isPlayer} = 0;
+ $self->{isDescription} = 0;
+ $self->{ean} = '';
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s|</li><li>|<ul class="platforms">|gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|\x{92}|'|gi;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|œ|oe|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+ }
+ $self->{alreadyRetrieved} = {};
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'ean')
+ {
+ $self->{ean} = $word;
+ }
+ else
+ {
+ $self->{ean} = '';
+ }
+
+ return 'http://next.videogame.it/magazine/review/?name='.$word;
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://next.videogame.it/';
+ }
+
+ sub getName
+ {
+ return 'NextGame';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-1";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'IT';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['name'];
+ }
+
+ sub getDefaultPictureSuffix
+ {
+ return '.jpg';
+ }
+}
+
+1;
+
diff --git a/lib/gcstar/GCPlugins/GCgames/GCTheLegacy.pm b/lib/gcstar/GCPlugins/GCgames/GCTheLegacy.pm new file mode 100644 index 0000000..c0759a4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCTheLegacy.pm @@ -0,0 +1,316 @@ +package GCPlugins::GCgames::GCTheLegacy; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCgames::GCgamesCommon; + +{ + package GCPlugins::GCgames::GCPluginTheLegacy; + + use base 'GCPlugins::GCgames::GCgamesPluginsBase'; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + if (($tagname eq 'a') && ( $attr->{class} eq 'aa' )) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.thelegacy.de/Museum/" . $attr->{href}; + $self->{isName} = 1 ; + } + } + elsif ($self->{parsingTips}) + { + } + else + { + if (($tagname eq 'div') && ($attr->{style} eq 'font-size:14pt; color:#990000; padding-top:0.5em;')) + { + $self->{isName} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'description') && ($self->{curInfo}->{platform} eq '')) + { + $self->{isPlatform} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{target} eq 'ListGames') && ($attr->{class} eq 'a') && ($attr->{style} eq '')) + { + $self->{isGenre} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m|/pics/cover/Thumb|i)) + { + $self->{curInfo}->{boxpic} = "http://www.thelegacy.de" . $attr->{src}; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m|/pics/backcover/Thumb|i)) + { + $self->{curInfo}->{backpic} = "http://www.thelegacy.de" . $attr->{src}; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m|/pics/screen|i)) + { + if ($self->{curInfo}->{screenshot1} eq '') + { + $self->{curInfo}->{screenshot1} = "http://www.thelegacy.de" . $attr->{src}; + } + elsif ($self->{curInfo}->{screenshot2} eq '') + { + $self->{curInfo}->{screenshot2} = "http://www.thelegacy.de" . $attr->{src}; + } + } + elsif ( (($tagname eq 'span') ||($tagname eq 'div')) && ($attr->{class} eq 'category')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($self->{isEditor} eq 1) && ($tagname eq 'a') && ($attr->{target} eq 'ListGames') && ($attr->{class} eq 'aa') && ($self->{curInfo}->{editor} eq '')) + { + $self->{isEditor} = 2 ; + } + elsif (($self->{isDeveloper} eq 1) && ($tagname eq 'a') && ($attr->{target} eq 'ListGames') && ($attr->{class} eq 'aa') && ($self->{curInfo}->{developer} eq '')) + { + $self->{isDeveloper} = 2 ; + } + elsif (($self->{isDate} eq 1) && ($tagname eq 'div') && ($attr->{class} eq 'description') && ($self->{curInfo}->{released} eq '')) + { + $self->{isDate} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{name} =~ m|review_|i)) + { + $self->{isDescription} = 1 ; + } + elsif (($self->{isDescription} eq 1) && ($tagname eq 'a') && ($attr->{class} eq 'aa')) + { + $self->{isDescription} = 2 ; + } + elsif (($tagname eq 'span') && ( $attr->{id} =~ m|review_|i) && ( $attr->{id} =~ m|_less|i)) + { + $self->{isDescription} = 0 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//; + if ($self->{isName}) + { + $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext; + $self->{isName} = 0; + } + } + elsif ($self->{parsingTips}) + { + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//; + if ($self->{isName}) + { + $self->{curInfo}->{name} = $origtext; + $self->{isName} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isDate} = 0; + $self->{isDeveloper} = 0; + $self->{isEditor} = 0; + + $self->{isDate} = 1 if ($origtext =~ m/ffentlichung/i); + $self->{isDate} = 1 if ($origtext =~ m/Publishing/i); + $self->{isDeveloper} = 1 if ($origtext =~ m/Entwickler/i); + $self->{isDeveloper} = 1 if ($origtext =~ m/developer/i); + $self->{isEditor} = 1 if ($origtext =~ m/Publisher/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isDate} eq 2) + { + $origtext =~ s/://; + $self->{curInfo}->{released} = $origtext; + $self->{isDate} = 0 ; + } + elsif ($self->{isGenre}) + { + $self->{curInfo}->{genre} .= $origtext; + $self->{curInfo}->{genre} .= ","; + $self->{isGenre} = 0; + } + elsif ($self->{isDeveloper} eq 2) + { + $self->{curInfo}->{developer} = $origtext; + $self->{isDeveloper} = 0 ; + } + elsif ($self->{isEditor} eq 2) + { + $self->{curInfo}->{editor} = $origtext; + $self->{isEditor} = 0 ; + } + elsif ($self->{isPlatform}) + { + $origtext =~ s/PC \(Windows\)/PC/; + my @array = split(/-/,$origtext); + $self->{curInfo}->{platform} = $array[0]; + # Enleve les blancs en fin de chaine + $self->{curInfo}->{platform} =~ s/\s+$//; + $self->{isPlatform} = 0; + } + elsif ($self->{isDescription} eq 1) + { + $self->{curInfo}->{description} .= $origtext; + } + elsif ($self->{isDescription} eq 2) + { + $self->{isDescription} = 1; + } + } + } + + sub getTipsUrl + { + my $self = shift; + + return ; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + name => 1, + platform => 0, + genre => 0, + released => 0 + }; + + $self->{isInfo} = 0; + $self->{isName} = 0; + $self->{isPlatform} = 0 ; + $self->{isAnalyse} = 0; + $self->{isEditor} = 0; + $self->{isDeveloper} = 0; + $self->{isDate} = 0; + $self->{isGenre} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|œ|oe|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + } + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.thelegacy.de/Museum/SQLlist_games.php3?logicalSearchConnection[]=AND&SearchValue=" . $word. "&searchEntity=TITLE&Review=&Forum=&type=&changed=&TopTen=&titel_id=&game_id=&titel=&first_letter=&misc=yes&quick=yes"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return 'TheLegacy'; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'DE'; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-15"; + } + + sub getSearchFieldsArray + { + return ['name']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCgames/GCgamesAmazonCommon.pm b/lib/gcstar/GCPlugins/GCgames/GCgamesAmazonCommon.pm new file mode 100644 index 0000000..ce3b52c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCgamesAmazonCommon.pm @@ -0,0 +1,314 @@ +package GCPlugins::GCgames::GCgamesAmazonCommon;
+
+###################################################
+#
+# Copyright 2005-2010 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCgames::GCgamesCommon;
+use GCPlugins::GCstar::GCAmazonCommon;
+
+{
+ package GCPlugins::GCgames::GCgamesAmazonPluginsBase;
+
+ use base ('GCPlugins::GCgames::GCgamesPluginsBase', 'GCPlugins::GCstar::GCPluginAmazonCommon');
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ if ( ($tagname eq 'div') && ($attr->{class} eq 'buying') && ($self->{isGame} ne 2) )
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl};
+ $self->{isGame} = 2 ;
+ }
+ elsif ( ($tagname eq 'h1') && ($attr->{class} eq 'headerblocktitle') && ($self->{isGame} ne 2) )
+ {
+ $self->{isGame} = 1 ;
+ $self->{isUrl} = 1 ;
+ }
+ elsif ( ($tagname eq 'td') && ($attr->{class} eq 'imageColumn') && ($self->{isGame} ne 2) )
+ {
+ $self->{isGame} = 1 ;
+ $self->{isUrl} = 1 ;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isGame} eq 1) && ($self->{isUrl}) )
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{isUrl} = 0 ;
+ }
+ elsif ( ($tagname eq 'span') && ($attr->{class} eq 'srTitle') && ($self->{isGame} eq 1) )
+ {
+ $self->{isName} = 1 ;
+ }
+ elsif ( ($tagname eq 'span') && ($attr->{class} eq 'binding') && ($self->{isGame} eq 1) )
+ {
+ $self->{isPlatform} = 1 ;
+ }
+ elsif ( ($tagname eq 'span') && ($attr->{class} eq 'avail') )
+ {
+ $self->{isGame} = 0 ;
+ }
+ elsif ( ($tagname eq 'div') && ($attr->{class} eq 'usedPrice') )
+ {
+ $self->{isGame} = 0 ;
+ }
+ elsif ( ($tagname eq 'input') && ($attr->{name} eq 'sdp-sai-asin') )
+ {
+ $self->{isCodeEAN} = 1 ;
+ }
+ elsif ( ($tagname eq 'a') && ($self->{isCodeEAN}))
+ {
+ $self->{SaveUrl} = $attr->{href};
+ $self->{isCodeEAN} = 0 ;
+ }
+ elsif ( ($tagname eq 'b') && ($attr->{class} eq 'sans') )
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{SaveUrl};
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+
+ if ( ($tagname eq 'meta') && ($attr->{name} eq 'keywords') )
+ {
+ my ($name, $editor, @genre) = split(/,/,$attr->{content});
+ $self->{curInfo}->{name} = $name;
+ $self->{curInfo}->{editor} = $editor;
+ my $element;
+ foreach $element (@genre)
+ {
+ $element =~ s/^\s+//;
+ if ( !($element =~ m/console/i) && !($element =~ m/cartouche/i) && !($element =~ m/video games/i) && !($element =~ /([0-9])/))
+ {
+ $self->{curInfo}->{genre} .= $element;
+ $self->{curInfo}->{genre} .= ",";
+ }
+ }
+
+ # Sur Amazon.com et amazon.co.jp je n ai pas reussi a trouver un critere pertinent pour la recherche des genres
+ if (($self->{suffix} eq 'com') || ($self->{suffix} eq 'co.jp') )
+ {
+ $self->{curInfo}->{genre} = '';
+ }
+
+ if ($self->{ean} ne '')
+ {
+ $self->{curInfo}->{ean} = $self->{ean};
+ }
+ }
+ elsif ($tagname eq 'tpfdateparution')
+ {
+ $self->{isDate} = 1 ;
+ }
+ elsif ($tagname eq 'tpfplateforme')
+ {
+ $self->{isPlatform} = 1 ;
+ }
+ elsif ($tagname eq 'tpfcouverture')
+ {
+ $self->{curInfo}->{boxpic} = $self->extractImage($attr);
+ }
+ elsif ($tagname eq 'tpfscreenshot1')
+ {
+ $self->{curInfo}->{screenshot1} = $self->extractImage($attr);
+ }
+ elsif ($tagname eq 'tpfscreenshot2')
+ {
+ $self->{curInfo}->{screenshot2} = $self->extractImage($attr);
+ }
+ elsif (($tagname eq 'tpfdescription') )
+ {
+ $self->{isDesc} = 1;
+ }
+ elsif ( ($tagname eq 'div') && ($attr->{class} eq 'bucket') && ($self->{isDesc} eq 1))
+ {
+ $self->{isDesc} = 0;
+ }
+
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isPlatform})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{platform} = $self->transformPlatform($origtext);
+ $self->{isPlatform} = 0;
+ }
+ elsif ($self->{isName})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{name} = $origtext;
+ $self->{isName} = 0;
+ }
+ }
+ elsif ($self->{parsingTips})
+ {
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+
+ if ($self->{isDate})
+ {
+ $self->{curInfo}->{released} = $origtext;
+ $self->{isDate} = 0;
+ }
+ elsif ($self->{isPlatform})
+ {
+ if ($origtext ne '' )
+ {
+ $self->{curInfo}->{platform} = $self->transformPlatform($origtext);
+ $self->{isPlatform} = 0;
+ }
+ }
+ elsif (($self->{isDesc}) && ($origtext ne ""))
+ {
+ $self->{curInfo}->{description} .= $origtext ."\n";
+ }
+ }
+ }
+
+ sub transformPlatform
+ {
+ my ($self, $platform) = @_;
+
+ $platform =~ s/^([\w ]*)\W{2}.*$/$1/ms;
+ $platform =~ s/SONY //i;
+ if ($platform =~ m/windows/i)
+ {
+ $platform = 'PC';
+ }
+ return $platform;
+ }
+
+ sub getTipsUrl
+ {
+ my $self = shift;
+
+ return;
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ name => 1,
+ platform => 1
+ };
+
+ $self->{isCodeEAN} = 0;
+ $self->{SaveUrl} = '';
+ $self->{isName} = 0;
+ $self->{isGame} = 0;
+ $self->{isUrl} = 0;
+ $self->{isPlatform} = 0;
+ $self->{isDate} = 0;
+ $self->{isDesc} = 0;
+ $self->{ean} = '';
+
+ return $self;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'ean')
+ {
+ $self->{ean} = $word;
+ return "http://s1.amazon." . $self->{suffix} . "/exec/varzea/sdp/sai-condition/" . $word;
+ }
+ else
+ {
+ $self->{ean} = '';
+ }
+
+ return 'http://www.amazon.' . $self->{suffix} . '/gp/search/?redirect=true&search-alias=videogames&keywords=' .$word;
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.amazon.' . $self->{suffix};
+ }
+
+ sub getName
+ {
+ return 'Amazon';
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+
+ return "ISO-8859-1";
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['ean', 'name'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCgames/GCgamesCommon.pm b/lib/gcstar/GCPlugins/GCgames/GCgamesCommon.pm new file mode 100644 index 0000000..9403652 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCgames/GCgamesCommon.pm @@ -0,0 +1,87 @@ +package GCPlugins::GCgames::GCgamesCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +our $MAX_ACTORS = 6; +our $MAX_DIRECTORS = 4; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCgames::GCgamesPluginsBase; + + use base qw(GCPluginParser); + use HTML::Entities; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['name']; + } + + sub getTipsUrl + { + my $self = shift; + + return ''; + } + + sub getTips + { + my $self = shift; + my $url = $self->getTipsUrl; + if ($url) + { + $self->{parsingTips} = 1; + my $html = $self->loadPage($url, 0, 1); + $html = $self->preProcess($html); + decode_entities($html); + $self->{inside} = undef; + $self->parse($html); + $self->{parsingTips} = 0; + } + } + + sub getItemInfo + { + my $self = shift; + + $self->SUPER::getItemInfo; + $self->getTips; + + return $self->{curInfo}; + } + +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCmusics/GCDiscogs.pm b/lib/gcstar/GCPlugins/GCmusics/GCDiscogs.pm new file mode 100644 index 0000000..c6e0c87 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCmusics/GCDiscogs.pm @@ -0,0 +1,333 @@ +package GCPlugins::GCmusics::GCDiscogs; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCmusics::GCmusicsCommon; + +{ + package GCPlugins::GCmusics::GCPluginDiscogs; + + use base 'GCPlugins::GCmusics::GCmusicsPluginsBase'; + use XML::Simple; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + my $key = $self->{searchField}; + if ($self->{parsingList}) + { + if ( $key eq 'artist' ) + { + $xml = $xs->XMLin($page); + my $artist = $xml -> {'artist'} -> {'name'}; + my $release; + foreach $release ( keys( %{ $xml -> {'artist'} -> {'releases'} -> {'release'} } ) ) + { + $self->{itemIdx}++; + my $title = $xml -> {'artist'} -> {'releases'} -> {'release'} -> {$release} -> {'title'}; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://api.discogs.com/release/".$release."?f=xml&api_key=e8f5ae8ba2"; + $self->{itemsList}[$self->{itemIdx}]->{title} = $title; + # Enleve les blancs en debut de chaine + $self->{itemsList}[$self->{itemIdx}]->{title} =~ s/^\s+//; + + $self->{itemsList}[$self->{itemIdx}]->{artist} = $artist; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{artist} =~ s/\s+$//; + } + } + elsif ( $key eq 'label' ) + { + $xml = $xs->XMLin($page); + my $release; + foreach $release ( keys( %{ $xml -> {'label'} -> {'releases'} -> {'release'} } ) ) + { + $self->{itemIdx}++; + my $title = $xml -> {'label'} -> {'releases'} -> {'release'} -> {$release} -> {'title'}; + my $artist = $xml -> {'label'} -> {'releases'} -> {'release'} -> {$release} -> {'artist'}; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://api.discogs.com/release/".$release."?f=xml&api_key=e8f5ae8ba2"; + $self->{itemsList}[$self->{itemIdx}]->{title} = $title; + # Enleve les blancs en debut de chaine + $self->{itemsList}[$self->{itemIdx}]->{title} =~ s/^\s+//; + + $self->{itemsList}[$self->{itemIdx}]->{artist} = $artist; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{artist} =~ s/\s+$//; + } + } + else + { + $xml = $xs->XMLin($page, + ForceArray => ['result', 'event'], + KeyAttr => {'release' => ''}); + my $release; + foreach $release ( @{ $xml->{'searchresults'}->{result} } ) + { + if ($release->{type} eq 'release') + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $release->{uri}; + $self->{itemsList}[$self->{itemIdx}]->{release} = $release->{summary}; + + my $found = index($release->{title},"-"); + if ( $found >= 0 ) + { + + $self->{itemsList}[$self->{itemIdx}]->{title} = substr($release->{title}, $found +length('-'),length($release->{title})- $found -length('-')); + # Enleve les blancs en debut de chaine + $self->{itemsList}[$self->{itemIdx}]->{title} =~ s/^\s+//; + + $self->{itemsList}[$self->{itemIdx}]->{artist} = substr($release->{title}, 0, $found); + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{artist} =~ s/\s+$//; + + # Clean up release summary + my $tmpTitle = $release->{title}; + $tmpTitle =~ s/\- //; + + # Unsure about this line, seems to not be required anymore, and is breaking parsing + # of search results. EG - searching for "raw animals" + # $self->{itemsList}[$self->{itemIdx}]->{release} =~ s/^$tmpTitle //; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $release->{title}; + } + } + } + } + } + else + { + $xml = $xs->XMLin($page, + ForceArray => ['track', 'artist', 'image', 'label', 'genre', 'format'], + KeyAttr => {'track' => ''}); + $self->{curInfo}->{title} = $xml->{release}->{title}; + $self->{curInfo}->{artist} = ''; + for my $art (@{$xml->{release}->{artists}->{artist}}) + { + $self->{curInfo}->{artist} .= $art->{name}.', '; + } + $self->{curInfo}->{artist} =~ s/, $//; + $self->{curInfo}->{producer} = ''; + $self->{curInfo}->{composer} = ''; + for my $rel (@{$xml->{release}->{extraartists}->{artist}}) + { + $self->{curInfo}->{producer} .= $rel->{name}.', ' + if $rel->{role} eq 'Producer'; + $self->{curInfo}->{composer} .= $rel->{name}.', ' + if (($rel->{role} eq 'Composed By') || ($rel->{role} eq 'Score') || ($rel->{role} eq 'Songwriter') || ($rel->{role} eq 'Written-By')); + } + $self->{curInfo}->{producer} =~ s/, $//; + $self->{curInfo}->{composer} =~ s/, $//; + $self->{curInfo}->{release} = $xml->{release}->{released}; + for my $track(@{$xml->{release}->{'tracklist'}->{track}}) + { + my $duree = $track->{duration}; + $duree =~ /([0-9]+):([0-9]+)/; + my $duree2 = int($1*60 + $2); + my $position = ""; + # Sometimes the position is missing, which causes it to be an array + if (!ref($track->{position})) + { + $position = $track->{position}; + } + $self->addTrack($track->{title}, $duree2, $position); + } + $self->{curInfo}->{tracks} = $self->getTracks; + $self->{curInfo}->{running} = $self->getTotalTime; + for my $cover(@{$xml->{release}->{images}->{image}}) + { + if ($self->{curInfo}->{cover} eq '') + { + if ($self->{bigPics}) + { + $self->{curInfo}->{cover} = $cover->{uri}; + } + else + { + $self->{curInfo}->{cover} = $cover->{uri}; + # Change to small res cover + $self->{curInfo}->{cover} =~ s/image\/R-/image\/R-150-/; + } + } + + } + $self->{curInfo}->{label} = ''; + for my $label (@{$xml->{release}->{labels}->{label}}) + { + $self->{curInfo}->{label} .= $label->{name}.', '; + } + $self->{curInfo}->{label} =~ s/, $//; + $self->{curInfo}->{genre} = ''; + for my $genre (@{$xml->{release}->{genres}->{genre}}) + { + $self->{curInfo}->{genre} .= $genre.','; + } + $self->{curInfo}->{genre} =~ s/,$//; + $self->{curInfo}->{origin} = $xml->{release}->{country}; + $self->{curInfo}->{origin} =~ s/,$//; + for my $format(@{$xml->{release}->{formats}->{format}}) + { + if ( $self->{curInfo}->{format} eq '') + { + $self->{curInfo}->{format} = $format->{name}; + $self->{curInfo}->{format} =~ s/,$//; + } + } + $self->{curInfo}->{web} = 'http://www.discogs.com/release/' . $xml->{release}->{id}; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + artist => 1, + release => 1 + }; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $key = $self->{searchField}; + my $url; + if ( $key eq 'title' ) + { + $url = "http://api.discogs.com/search?type=all&q=". $word ."&f=xml&api_key=e8f5ae8ba2"; + } + elsif ( $key eq 'artist' ) + { + $url = "http://api.discogs.com/". $key ."/". $word ."?f=xml&api_key=e8f5ae8ba2"; + } + elsif ( $key eq 'label' ) + { + $url = "http://api.discogs.com/". $key ."/". $word ."?f=xml&api_key=e8f5ae8ba2"; + } + + return $url; +# return "http://api.discogs.com/search?type=all&q=". $word ."&f=xml&api_key=e8f5ae8ba2"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.discogs.com"; + } + elsif (index($url,"api_key") < 0) + { + # Url isn't for the discogs api, so we need to find the release id + # and return a url corresponding to the api page for this release + $url =~ /release\/([0-9]+)/; + my $id = $1; + $url = "http://api.discogs.com/release/". $id ."?f=xml&api_key=e8f5ae8ba2"; + } + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $self->getItemUrl($url); + } + + sub getName + { + return 'Discogs'; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub getSearchFieldsArray + { + return ['title', 'artist', 'label']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCmusics/GCDoubanmusic.pm b/lib/gcstar/GCPlugins/GCmusics/GCDoubanmusic.pm new file mode 100644 index 0000000..b2d7873 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCmusics/GCDoubanmusic.pm @@ -0,0 +1,238 @@ +package GCPlugins::GCmusics::GCDoubanmusic; + +################################################### +# +# Copyright 2005-2010 Bai Wensimi +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCmusics::GCmusicsCommon; + +{ + package GCPlugins::GCmusics::GCPluginDoubanmusic; + + use base qw(GCPlugins::GCmusics::GCmusicsPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if (($page =~ /^bad isbn/) & ($page =~ /^The/)); + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page =~ /feed>$/) + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + foreach my $ItemMusic ( @{$xml->{'entry'}}){ + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $ItemMusic->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $ItemMusic->{'title'}; + foreach my $tmp_author (@{$ItemMusic->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'artist'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'artist'}.=',';} + $self->{itemsList}[ $self->{itemIdx} ]->{'artist'}.=$tmp_author->{'name'}; + } + } + } + else + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $xml->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $xml->{'title'}; + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{itemsList}[ $self->{itemIdx} ]->{'artist'}.=$tmp_author->{'name'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'artist'}.=','; + } + } + } + else + { + $xml =$xs->XMLin($page, + ForceArray => [ 'author' ], + KeyAttr => {'db:tag'=>'name','link'=>'rel'}); + foreach my $tmp_author (@{$xml->{'author'}}){ + {($self->{curInfo}->{artist} ne '' ) and $self->{curInfo}->{artist}.=','; } + $self->{curInfo}->{artist}.=$tmp_author->{'name'}; + } + $self->{curInfo}->{title}=$xml->{'title'}; + $self->{curInfo}->{web}=$xml->{'link'}->{'alternate'}->{'href'}; + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH: { + $db_attr eq 'publisher' and $self->{curInfo}->{producer}=$check->{'content'} ,last; + $db_attr eq 'pubdate' and $self->{curInfo}->{release}=$check->{'content'} ,last; + $db_attr eq 'ean' and $self->{curInfo}->{unique}=$check->{'content'} ,last; + $db_attr eq 'media' and $self->{curInfo}->{format}=$check->{'content'} ,last; + if ($db_attr eq 'tracks') { my @chains = split(/(?=\d+\.)/, $check->{'content'}); + foreach my $track ( @chains ){ + my $num=$track;my $name=$track; + $num=~ s/(^\d+).*/$1/; + $num=~ s/\n//g; + $name =~ s/^\d+\.(.*)/$1/; + $name=~s/\n//g; + $num=encode("utf8",$num); + $name=encode("utf8",$name); + $self->addTrack($name,0,$num); + } + last SWITCH;} + ; + } + } + $self->{curInfo}->{tracks} = $self->getTracks; + my $tmp_image=$xml->{'link'}->{'image'}->{'href'}; + $tmp_image =~ s/spic/lpic/; + $self->{curInfo}->{cover}=$tmp_image; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + artist => 1, + publication => 0, + }; + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://api.douban.com/music/subjects?q=" .$word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "豆瓣"; + } + + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + $self->{hasField} = { + title => 1, + artist => 1, + }; + } + + sub getAuthor + { + return 'BW'; + } + + sub getLang + { + return 'ZH'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "UTF-8"; + } + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'ZH'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCmusics/GCMusicBrainz.pm b/lib/gcstar/GCPlugins/GCmusics/GCMusicBrainz.pm new file mode 100644 index 0000000..f91027f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCmusics/GCMusicBrainz.pm @@ -0,0 +1,309 @@ +package GCPlugins::GCmusics::GCMusicBrainz; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCmusics::GCmusicsCommon; + +{ + package GCPlugins::GCmusics::GCPluginMusicBrainz; + + use base 'GCPlugins::GCmusics::GCmusicsPluginsBase'; + use XML::Simple; + use Locale::Country; + + sub parse + { + my ($self, $page) = @_; + my $xml; + my $xs = XML::Simple->new; + if ($self->{parsingList}) + { + $xml = $xs->XMLin($page, + ForceArray => ['release', 'event'], + KeyAttr => {'release' => ''}); + my $release; + foreach $release ( @{ $xml->{'release-list'}->{release} } ) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = 'http://musicbrainz.org/album/'.$release->{id}.'.html'; + $self->{itemsList}[$self->{itemIdx}]->{title} = $release->{title}; + $self->{itemsList}[$self->{itemIdx}]->{artist} = $release->{artist}->{name}; + + my $releaseDate='9999-12-31'; + for my $releaseEvent (@{$release->{'release-event-list'}->{event}}) + { + if ($releaseEvent->{date} lt $releaseDate) + { + # Find the earliest release event + $releaseDate = $releaseEvent->{date}; + } + } + + $self->{itemsList}[$self->{itemIdx}]->{release} = $releaseDate + if $releaseDate ne '9999-12-31'; + } + } + else + { + $xml = $xs->XMLin($page, + ForceArray => ['track', 'event', 'relation', 'relation-list','tag'], + KeyAttr => {'track' => ''}); + $self->{curInfo}->{title} = $xml->{release}->{title}; + $self->{curInfo}->{web} = 'http://musicbrainz.org/release/'.$xml->{release}->{id}.'.html'; + $self->{curInfo}->{artist} = $xml->{release}->{artist}->{name}; + $self->{curInfo}->{ratingpress} = int($xml->{release}->{rating}->{content}) * 2; + $self->{curInfo}->{producer} = ''; + $self->{curInfo}->{composer} = ''; + + # Step through the relations + for my $relation (@{$xml->{release}->{'relation-list'}}) + { + if ($relation->{'target-type'} eq 'Artist') + { + # Artist type relations + for my $rel (@{$relation->{relation}}) + { + # Search for producer or composer relations + $self->{curInfo}->{producer} .= $rel->{artist}->{name}.', ' + if $rel->{type} eq 'Producer'; + $self->{curInfo}->{composer} .= $rel->{artist}->{name}.', ' + if $rel->{type} eq 'Composer'; + } + } + elsif ($relation->{'target-type'} eq 'Url') + { + # Look for url type relations. Currently only jamendo works, but we should also cover the archive.org + # relations + for my $rel (@{$relation->{relation}}) + { + # Alternate cover art sites + if (($rel->{target} =~ m/jamendo.com/) && (!$self->{curInfo}->{cover})) + { + # Cover art should be on jamendo + $rel->{target} =~ /\/([0-9]+)$/; + my $id = $1; + if ($self->{bigPics}) + { + $self->{curInfo}->{cover} = "http://img.jamendo.com/albums/$id/covers/1.0.jpg"; + } + else + { + $self->{curInfo}->{cover} = "http://img.jamendo.com/albums/$id/covers/1.200.jpg"; + } + } + } + } + } + + $self->{curInfo}->{producer} =~ s/, $//; + $self->{curInfo}->{composer} =~ s/, $//; + + my $releaseDate; + my $releaseLabel; + my $releaseCountry; + my $releaseFormat; + my $releaseDateFromCompare='9999-12-12'; + for my $releaseEvent (@{$xml->{release}->{'release-event-list'}->{event}}) + { + my $releaseDateToCompare; + # Check if musicbrainz only has the year, if so, set things up so we'll prefer + # releases with the month & day over year-only releases + if (length($releaseEvent->{date}) == 4) + { + $releaseDateToCompare = $releaseEvent->{date}."-12-31"; + } + else + { + $releaseDateToCompare = $releaseEvent->{date}; + } + + if (($releaseDateToCompare lt $releaseDateFromCompare) || + (($releaseDateToCompare eq $releaseDateFromCompare) && + (($releaseEvent->{country} eq 'US') || ($releaseEvent->{country} eq 'GB')))) + { + # Find the earliest release event, which has a month & day + # Big call, but we're probably more correct choosing a US or UK release if there's two + # release events with the same date, so prioritise them + $releaseDate = $releaseEvent->{date}; + $releaseLabel = $releaseEvent->{label}->{name} + if $releaseEvent->{label}; + $releaseCountry = code2country($releaseEvent->{country}); + $releaseFormat = $releaseEvent->{format}; + $releaseDateFromCompare = $releaseDateToCompare; + } + } + + $self->{curInfo}->{release} = $releaseDate; + $self->{curInfo}->{label} = $releaseLabel; + $self->{curInfo}->{origin} = $releaseCountry; + $self->{curInfo}->{format} = $releaseFormat; + + for my $track(@{$xml->{release}->{'track-list'}->{track}}) + { + $self->addTrack($track->{title}, $track->{duration} / 1000); + } + $self->{curInfo}->{tracks} = $self->getTracks; + $self->{curInfo}->{running} = $self->getTotalTime; + + for my $genre(@{$xml->{release}->{'tag-list'}->{tag}}) + { + # Capitalize first letter of each word + $genre->{content} =~ s/\b(\w+)\b/ucfirst($1)/ge; + # Only add genres if they have more then 1 vote, strips out a lot of + # weird/wrong tags + push @{$self->{curInfo}->{genre}}, [$genre->{content}] + if ($genre->{count} > 1); + } + + # If amazon artwork exists, use it + if (($xml->{release}->{asin}) && (!$self->{curInfo}->{cover})) + { + if ($self->{bigPics}) + { + $self->{curInfo}->{cover} = 'http://images.amazon.com/images/P/'.$xml->{release}->{asin}.'.01.LZZZZZZZ.jpg' + } + else + { + $self->{curInfo}->{cover} = 'http://images.amazon.com/images/P/'.$xml->{release}->{asin}.'.01.MZZZZZZZ.jpg' + } + } + } + } + + sub convertDate + { + my ($self, $date) = @_; + $date =~ /([0-9]{4})-?([0-9]{2})?-?([0-9]{2})?/; + return $3 .($3 ? '/' : '').$2.($2 ? '/' : '').$1; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + artist => 1, + release => 1, + tracks => 1 + }; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $key = ($self->{searchField} eq 'artist') ? 'artist' : 'title'; + return "http://musicbrainz.org/ws/1/release/?type=xml&$key=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return "http://musicbrainz.org/"; + } + + sub changeUrl + { + my ($self, $url) = @_; + $url =~ s|http://musicbrainz.org/album/(.*?)\.html|http://musicbrainz.org/ws/1/release/$1?type=xml&inc=artist+tracks+release-events+artist-rels+url-rels+ratings+labels+tags|; + $url =~ s|http://musicbrainz.org/release/(.*?)\.html|http://musicbrainz.org/ws/1/release/$1?type=xml&inc=artist+tracks+release-events+artist-rels+url-rels+ratings+labels+tags|; + return $url; + } + + sub getName + { + return 'MusicBrainz'; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub getSearchFieldsArray + { + return ['title', 'artist']; + } + + sub isPreferred + { + # Return status of 2 means plugin is default regardless of user's language + return 2; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCmusics/GCmusicsCommon.pm b/lib/gcstar/GCPlugins/GCmusics/GCmusicsCommon.pm new file mode 100644 index 0000000..186662c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCmusics/GCmusicsCommon.pm @@ -0,0 +1,62 @@ +package GCPlugins::GCmusics::GCmusicsCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; +use GCExtract::GCExtractMusics; + +{ + package GCPlugins::GCmusics::GCmusicsPluginsBase; + + use base ('GCPluginParser', 'GCExtract::GCmusicsExtracter'); + #use base ('GCPluginParser'); + use HTML::Entities; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub loadUrl + { + my ($self, $url) = @_; + + $self->resetTracks; + $self->SUPER::loadUrl($url); + return $self->{curInfo}; + } + +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCstar/GCAmazonCommon.pm b/lib/gcstar/GCPlugins/GCstar/GCAmazonCommon.pm new file mode 100644 index 0000000..a9ecd80 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCstar/GCAmazonCommon.pm @@ -0,0 +1,132 @@ +package GCPlugins::GCstar::GCAmazonCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCstar::GCPluginAmazonCommon; + + sub text + { + my ($self, $origtext) = @_; + return 0 if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ( + (($self->{inside}->{b}) + || ($self->{inside}->{span}) + || ($self->{inside}->{label})) + ) + { + my $suffix = $self->{suffix}; + if ((($suffix =~ /^co/) && ($origtext =~ /Sort by/)) + || (($suffix eq 'fr' ) && ($origtext =~ /Trier par/)) + || (($suffix eq 'de' ) && ($origtext =~ /Sortieren nach/))) + { + $self->{beginParsing} = 1; + return 1; + } + } + } + + return 0; + } + + sub extractImage + { + my ($self, $attr) = @_; + my $url = $attr->{src}; + return 'http://images.amazon.com/images/'.$1.'/'.$2.$3.'MZZZZZZZ.'.$5 + if ($url =~ m%^http://.*?images[.-]amazon\.com/images/(P)/([A-Z0-9]*)(\.[0-9]+\.)?[-A-Za-z0-9_.,]*?ZZZZZZZ(.*?)\.(jpg|gif)%); + return 'http://images.amazon.com/images/'.$1.'/'.$2.'.'.$3 + if ($url =~ m%^http://.*?images[.-]amazon\.com/images/(I|G)/([-\%A-Z0-9a-z+]*)\._.*?_\.(jpg|gif)%); + if ($attr->{id} eq 'prodImage') + { + $url =~ s/_AA[0-9]*_//; + return $url; + } + return ''; + } + + sub isEAN + { + my ($self, $value) = @_; + + my $l = length($value); + return 1 + if ($l == 8) + || ($l == 13) + || ($l == 15) + || ($l == 18); + return 0; + } + + sub isItemUrl + { + my ($self, $url) = @_; + return $1 + if (($url =~ m|/dp/[A-Z0-9]*/sr=([0-3]-[0-9]*)/qid=[0-9]*|) + || ($url =~ m|/dp/[A-Z0-9]*/ref=(?:sr\|pd)_([a-z0-9_]*)/[0-9]*|) + || ($url =~ m|/dp/[A-Z0-9]*/ref=(?:sr\|pd)_([a-z0-9_]*)/[0-9]*|)); + return undef; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.amazon.".$self->{suffix}."/gp/search/?redirect=true&search-alias=".$self->{searchType}."&keywords=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.amazon.'.$self->{suffix}; + } + + sub getAuthor + { + return 'Tian'; + } + + sub preProcess + { + my ($self, $html) = @_; + if ($self->{parsingList}) + { + $html =~ s|<span\s+class="srTitle">([^<]*)</span>|<srTitle>$1</srTitle>|gim; + $html =~ s|<td class="otherEditions">.*?</td>||gim; + } + else + { + $html =~ s|<a\s*href="/exec/obidos/ASIN/[0-9/\${}]*">([^<]*)</a>|$1|gim; + } + return $html; + } +} + +1; |