diff options
author | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
commit | 126bb8cb6b93240bb4d3a2b816b74c286c3d422b (patch) | |
tree | e66e1dfe77d53a52539489765c88d23e4423ae27 /lib/gcstar/GCPlugins/GCfilms |
Imported Upstream version 1.7.0upstream/1.7.0
Diffstat (limited to 'lib/gcstar/GCPlugins/GCfilms')
47 files changed, 14264 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlapage.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlapage.pm new file mode 100644 index 0000000..604fdc4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAlapage.pm @@ -0,0 +1,267 @@ +package GCPlugins::GCfilms::GCAlapage; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAlapage; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{class} eq "tx12noirbold") + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} eq "acteurs") + { + $self->{isActors} = 1; + } + elsif ($attr->{class} eq "realisateur") + { + $self->{isDirector} = 1; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /^\/resize\.php\?ref=([0-9]*)/) + { + $self->{curInfo}->{image} = + "http://imgdata.echo.fr/disque_l?v$1r.jpg"; + } + } + elsif ($tagname eq "span") + { + $self->{insideName} = 1 if $attr->{style} eq "color:#414B55;"; + $self->{insideActors} = 1 if $attr->{class} eq "tx11gris"; + } + elsif ($tagname eq "div") + { + $self->{insideSynopsis} = 1 if $attr->{align} eq "justify"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isActors}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"actors"} .= + $self->{itemsList}[ $self->{itemIdx} ]->{"actors"} + ? ', ' . $self->capWord($origtext) + : $self->capWord($origtext); + $self->{isActors} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"director"} = + $self->capWord($origtext); + $self->{isDirector} = 0; + } + + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideName}) + { + $self->{curInfo}->{title} = $self->capWord($origtext); + $self->{insideName} = 0; + } + elsif ($self->{insideActors}) + { + $origtext =~ s/avec : (.*) - (?:[^-]* )?DVD/$1/; + $origtext =~ s/ - /, /g; + $self->{curInfo}->{actors} = $self->capWord($origtext) + if !$self->{curInfo}->{actors}; + $self->{insideActors} = 0; + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ s/\[br\]/\n/g; + $self->{curInfo}->{synopsis} .= $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($origtext =~ m/R.*alisateur :/) + { + $origtext =~ s/R.*alisateur(?: :)?(.*)/$1/; + $origtext =~ s/ - /, /g; + $self->{curInfo}->{director} = $self->capWord($origtext) + if !$self->{curInfo}->{director}; + } + elsif ($origtext =~ m/Genre :/) + { + $origtext =~ s/Genre :(.*)/$1/; + $origtext = $self->capWord($origtext); + $origtext =~ s/ \/ /,/g; + $origtext =~ s/,Video//g; + $self->{curInfo}->{genre} = $origtext if !$self->{curInfo}->{genre}; + } + elsif ($origtext =~ m/Année du film :/) + { + $origtext =~ s/Année du film :(.*)/$1/; + $self->{curInfo}->{date} = $origtext if !$self->{curInfo}->{date}; + } + elsif ($origtext =~ m/Durée du film/) + { + $origtext =~ s/Durée du film(.*)/$1/; + $self->{curInfo}->{time} = $origtext if !$self->{curInfo}->{time}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #Fix for character-encoding: + $html =~ s//'/g; + $html =~ s/
/\.\.\./g; + #' + +#<<< keep perltidy away from these lines + $html =~ s/<br>/\[br\]/gi; + $html =~ s/ / /g; + $html =~ s/<u>|<\/u>|<b>|<\/b>|<i>|<\/i>//gi; + $html =~ s/<SPAN class="(?:tx12gris6|tx12noir)">([^<]*)<\/SPAN>/$1/gi; + $html =~ s|<A href="/-/Liste/DVD/mot_real_nomprenom=.*?\?id=[0-9]*&donnee_appel=ALAPAGE" class="roll">([^<]*)</A>|<div class="realisateur">$1</div>|gi; + $html =~ s|<A href="/-/Liste/DVD/mot_art_nomprenom=.*?\?id=[0-9]*&donnee_appel=ALAPAGE" class="roll">([^<]*)</A>|<div class="acteurs">$1</div>|gi; + $html =~ s/<A href="http\:\/\/www\.alapage\.com\/-\/Liste\/DVD\/mot_(?:art_nomprenom|real_nomprenom|gen_libelle)=[^\/]*\/\?id=[0-9]*&donnee_appel=ALAPAGE[^"]*?" class="roll">([^<]*)<\/A>/$1/gi; + $html =~ s|<A .*?mot_gen_libelle=.*?>(.*?)</A>|$1|gi; + $html =~ s/<TD valign="top" class="tx12noir[^"]*">([^<]*)<\/TD>[^<]*<TD>([^<]*)<\/TD>/<td>$1 $2<\/td>/gi; + $html =~ s/<td class="tx12grisbold" align="center" bgcolor="\#E6E6E8">([^<]*)<\/td>[^<]*<TD width="2"><IMG src="\/turbo\/templates\/img\/pix\.gif" width="2" height="25" border="0" alt=""><\/TD>[^<]*<td class="tx10noir" align="center" bgcolor="\#F4F4F6" colspan="3">([0-9]* mn)<\/td>/<td>$1 $2<\/td>/gi; +#>>> + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://alapage.com/mx/?type=41&tp=L&fulltext=" . $word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://alapage.com" . $url; + } + + sub getName + { + return "Alapage.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getDefaultPictureSuffix + { + return '.jpg'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAllmovie.pm b/lib/gcstar/GCPlugins/GCfilms/GCAllmovie.pm new file mode 100644 index 0000000..e8117c5 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAllmovie.pm @@ -0,0 +1,431 @@ +package GCPlugins::GCfilms::GCAllmovie; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAllmovie; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq "a") && ($self->{isFilm})) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{isFilm} = 0; + } + if ($tagname eq "td") + { + if ($attr->{style} =~ m/284px/) + { + $self->{isFilm} = 1; + } + elsif ($attr->{style} =~ m/70px/) + { + $self->{isYear} = 1; + } + elsif ($attr->{style} =~ m/190px/) + { + $self->{isDirector} = 1; + } + } + elsif ($tagname eq "tr") + { + $self->{isFound} = 1; + } + elsif ($tagname eq "title") + { + $self->{insideHTMLtitle} = 1; + } + } + else + { + if (($tagname eq "span") && ($attr->{class} eq "title")) + { + $self->{insideTitle} = 1; + } + elsif ( + ($tagname eq "div") + && ( ($attr->{id} eq "left-sidebar-title") + || ($attr->{id} eq "left-sidebar-title-small")) + ) + { + $self->{insideLeftSidebarTitle} = 1; + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/sql=B/) + { + $self->{insideActors} = 1; + } + elsif ($self->{insideDirectorList}) + { + $self->{insideDirector} = 1; + } + elsif ($self->{insideYearRuntime}) + { + $self->{insideYear} = 1; + $self->{insideYearRuntime} = 0; + } + elsif ($self->{insideCountriesRating}) + { + $self->{insideCountry} = 1; + $self->{insideCountriesRating} = 0; + } + elsif ($self->{nextIsSeries}) + { + $self->{insideSeries} = 1; + $self->{nextIsSeries} = 0; + } + } + elsif ($tagname eq "img") + { + if ($attr->{src} =~ /http\:\/\/image\.allmusic\.com/) + { + $self->{curInfo}->{image} = ($attr->{src}); + } + elsif ($self->{insideRatingStars}) + { + $attr->{title} =~ /([\d\.]+) Stars/; + $self->{curInfo}->{ratingpress} = $1 * 2; + $self->{insideRatingStars} = 0; + } + } + elsif ($tagname eq "li") + { + if ($self->{insideGenreList}) + { + $self->{insideGenre} = 1; + } + } + elsif ($tagname eq "td") + { + if ( ($self->{insideAKA}) + && ($attr->{class} =~ m/formed-sub/)) + { + $self->{insideOtherTitles} = 1; + } + elsif ($self->{nextIsRating}) + { + $self->{insideRating} = 1; + $self->{nextIsRating} = 0; + } + elsif ($self->{nextIsRuntime}) + { + $self->{insideTime} = 1; + $self->{nextIsRuntime} = 0; + } + elsif ($attr->{colspan} == 2) + { + if ($attr->{class} eq "large-list-title") + { + } + else + { + $self->{insideSynopsis} = 1; + } + } + elsif ($attr->{class} eq "rating-stars") + { + $self->{insideRatingStars} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq "td") + { + $self->{insideSynopsis} = 0; + } + if ($tagname eq "div") + { + $self->{insideLeftSidebarTitle} = 0; + } + if ($tagname eq "table") + { + $self->{insideGenreList} = 0; + $self->{insideAKA} = 0; + $self->{curInfo}->{original} =~ s/(, )$//; + } + } + + sub text + { + my ($self, $origtext) = @_; + return if ((length($origtext) == 0) || ($origtext eq " ")); + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if (($self->{insideHTMLtitle})) + { + if ($origtext !~ m/Results/) + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + $self->{insideHTMLtitle} = 0; + } + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext + if $origtext =~ m/^[0-9]{4}?/; + $self->{isYear} = 0; + } + if ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isDirector} = 0; + } + if ($self->{isInfo}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ m|\(([0-9]*)(/I+)?\)|; + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideLeftSidebarTitle}) + { + if ($origtext eq "Genres") + { + $self->{insideGenreList} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "Director") + { + $self->{insideDirectorList} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "Year") + { + $self->{insideYearRuntime} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "Countries") + { + $self->{insideCountriesRating} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + elsif ($origtext eq "AKA") + { + $self->{insideAKA} = 1; + $self->{insideLeftSidebarTitle} = 0; + } + } + elsif ($origtext =~ /Is part of the series:$/) + { + $self->{nextIsSeries} = 1; + } + if ($self->{insideActors}) + { + $self->{curInfo}->{actors} .= $origtext . ', ' + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{insideActors} = 0; + } + + if ($self->{insideYear}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideYear} = 0; + $self->{nextIsRuntime} = 1; + } + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideGenre}) + { + $self->{curInfo}->{genre} .= $self->capWord($origtext) . ','; + $self->{insideGenre} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + $self->{insideDirectorList} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideCountry}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideCountry} = 0; + $self->{nextIsRating} = 1; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{curInfo}->{time} =~ s/.[0-9]*?://; + $self->{insideTime} = 0; + } + elsif ($self->{insideRating}) + { + $self->{curInfo}->{age} = 1 + if ($origtext eq 'Unrated') || ($origtext eq 'Open'); + $self->{curInfo}->{age} = 2 + if ($origtext eq 'G') || ($origtext eq 'Approved'); + $self->{curInfo}->{age} = 5 + if ($origtext eq 'PG') || ($origtext eq 'M') || ($origtext eq 'GP'); + $self->{curInfo}->{age} = 13 if $origtext eq 'PG13'; + $self->{curInfo}->{age} = 17 if $origtext eq 'R'; + $self->{curInfo}->{age} = 18 + if ($origtext eq 'NC17') || ($origtext eq 'X'); + $self->{insideRating} = 0; + } + elsif ($self->{insideOtherTitles}) + { + $self->{tempOriginal} = $origtext; + $self->{tempOriginal} =~ s/\s*$//; + $self->{tempOriginal} =~ s/^\s*//; + + $self->{curInfo}->{original} .= $self->{tempOriginal} . ', '; + $self->{insideOtherTitles} = 0; + } + elsif ($self->{insideSeries}) + { + $self->{curInfo}->{serie} = $origtext; + $self->{curInfo}->{serie} =~ s/( \[.*\])//; + $self->{insideSeries} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $wordFiltered = $word; + + # Allmovie doesn't return correct results if searching with a prefix like 'the' + $wordFiltered =~ s/^(the|a)?[+\s]+[^ a-zA-Z0-9]*\s*//i; +# return ('http://allmovie.com/search/all', ['q' => $wordFiltered,'submit' => 'SEARCH']); + return ('http://allmovie.com/search/all/' . $wordFiltered); + + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + return "http://allmovie.com" . $url; + } + + sub getName + { + return "Allmovie"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm new file mode 100644 index 0000000..db37774 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm @@ -0,0 +1,403 @@ +package GCPlugins::GCfilms::GCAllocine; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAllocine; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{insideResults} eq 1) + { + if ( ($tagname eq "a") + && ($attr->{href} =~ /^\/film\/fichefilm_gen_cfilm=/) + && ($self->{isMovie} eq 0)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq "td") && ($self->{isMovie} eq 1)) + { + $self->{isMovie} = 2; + } + elsif (($tagname eq "a") && ($self->{isMovie} eq 2)) + { + $self->{isMovie} = 3; + } + elsif (($tagname eq "br") && ($self->{isMovie} eq 3)) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/^\s*//; + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s*$//; + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s+/ /g; + $self->{isMovie} = 4; + } + elsif (($tagname eq "span") + && ($attr->{class} eq "fs11") + && ($self->{isMovie} eq 4)) + { + $self->{isInfo} = 1; + $self->{isMovie} = 0; + } + elsif (($tagname eq "br") && ($self->{isInfo} eq 1)) + { + $self->{isInfo} = 2; + } + elsif (($tagname eq "br") && ($self->{isInfo} eq 2)) + { + $self->{isInfo} = 3; + } + } + } + else + { + if (($tagname eq "div") && ($attr->{class} eq "poster")) + { + $self->{insidePicture} = 1; + } + elsif (($tagname eq "img") && ($self->{insidePicture} eq 1)) + { + my $src = $attr->{src}; + if (!$self->{curInfo}->{image}) + { + if ($src =~ /r_160_240/) + { + $self->{curInfo}->{image} = $src; + } + else + { + $self->{curInfo}->{image} = "empty"; + } + } + } + elsif ($tagname eq "h1") + { + $self->{insideTitle} = 1; + } + elsif (($tagname eq "span") && ($self->{insideDate} eq 1)) + { + $self->{insideDate} = 2; + } + elsif (($tagname eq "span") && ($attr->{itemprop} eq "duration")) + { + $self->{insideTime} = 1; + } + elsif (($tagname eq "span") && ($self->{insideDirector} eq 1)) + { + $self->{insideDirector} = 2; + } + elsif (($tagname eq "a") && ($self->{insideActor} eq 1)) + { + $self->{insideActor} = 2; + } + elsif (($tagname eq "span") && ($self->{insideGenre} eq 1)) + { + $self->{insideGenre} = 2; + } + elsif (($tagname eq "span") && ($self->{insideCountry} eq 1)) + { + $self->{insideCountry} = 2; + } + elsif (($tagname eq "span") && ($attr->{class} eq "note") && ($self->{insidePressRating} eq 1)) + { + $self->{insidePressRating} = 2; + } + elsif (($tagname eq "div") && ($attr->{class} eq "breaker")) + { + $self->{insidePressRating} = 0; + } + elsif (($tagname eq "p") && ($attr->{itemprop} eq "description")) + { + $self->{insideSynopsis} = 1; + } + elsif (($tagname eq "td") && ($self->{insideOriginal} eq 1)) + { + $self->{insideOriginal} = 2; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($tagname eq "li") + { + $self->{insideDirector} = 0; + $self->{insideActor} = 0; + $self->{insideGenre} = 0; + } + elsif ($tagname eq "div") + { + $self->{insideCountry} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActor} = 0; + } + elsif ($tagname eq "th") + { + $self->{insideSynopsis} = 0; + } + elsif ($tagname eq "table") + { + $self->{insideResults} = 0; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if (($origtext =~ m/(\d+) r..?sultats? trouv..?s? dans les titres de films/) && ($1 > 0)) + { + $self->{insideResults} = 1; + } + if ($self->{isMovie} eq 3) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext; + } + if ($self->{isInfo} eq 1) + { + if ($origtext =~ /\s*([0-9]{4})/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + } + elsif ($self->{isInfo} eq 2) + { + if ($origtext =~ /^\s*de (.*)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $1; + } + } + elsif ($self->{isInfo} eq 3) + { + if ( ($origtext =~ m/^\s*avec (.*)/) + && (!$self->{itemsList}[ $self->{itemIdx} ]->{actors})) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1; + } + $self->{isInfo} = 0; + } + } + else + { + my ($self, $origtext) = @_; + $origtext =~ s/[\r\n]//g; + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + + if ($self->{insideTitle} eq 1) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif (($self->{insideDate} eq 2) && (length($origtext) > 1)) + { + $self->{curInfo}->{date} = $origtext + if !($origtext =~ /inconnu/); + $self->{insideDate} = 0; + } + elsif (($origtext =~ /^Date de sortie/) + && (!$self->{curInfo}->{date})) + { + $self->{insideDate} = 1; + } + elsif (($origtext =~ /^Date de reprise/) + && (!$self->{curInfo}->{date})) + { + $self->{insideDate} = 1; + } + elsif ($self->{insideTime} eq 1) + { + $origtext =~ /(\d+)h\s*(\d+)m/; + my $time = ($1*60) + $2; + $self->{curInfo}->{time} = $time." m."; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector} eq 2) + { + $origtext = ", " if $origtext =~ m/^,/; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($origtext =~ /^R..?alis..? par/) + { + $self->{insideDirector} = 1; + } + elsif ($self->{insideActor} eq 2) + { + $origtext =~ s/plus//; + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{actors} .= $origtext; + } + elsif ($origtext =~ /^Avec/) + { + $self->{insideActor} = 1; + } + elsif ($self->{insideGenre} eq 2) + { + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{genre} .= $origtext; + } + elsif ($origtext =~ /^[\s\n]*Genre/) + { + $self->{insideGenre} = 1; + } + elsif ($self->{insideCountry} eq 2) + { + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{country} .= $origtext; + } + elsif ($origtext =~ /Nationalité/) + { + $self->{insideCountry} = 1; + } + elsif ($origtext =~ /^Presse$/) + { + $self->{insidePressRating} = 1; + } + elsif ($self->{insidePressRating} eq 2) + { + $origtext =~ s/,/./; + $self->{curInfo}->{ratingpress} .= $origtext * 2; + } + elsif ($origtext =~ /^Interdit aux moins de (\d+) ans/) + { + $self->{curInfo}->{age} = $1; + } + elsif ($self->{insideSynopsis} eq 1) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideOriginal} eq 2) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($origtext =~ /^Titre original/) + { + $self->{insideOriginal} = 1; + } + + + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{insideResults} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{actorsCounter} = 0; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # f=3 ? + # return "http://www.allocine.fr/recherche/?q=$word&f=3&rub=1"; + return "http://www.allocine.fr/recherche/1/?q=$word"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.allocine.fr" . $url; + } + + sub getName + { + return "Allocine.fr"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + # return "UTF-8"; # For 1.5.0 Win32 + return "ISO-8859-1"; # For 1.5.0 Win32 with /lib/gcstar/GCPlugins/ ver.1.5.9svn + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm new file mode 100644 index 0000000..75c6854 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm @@ -0,0 +1,435 @@ +package GCPlugins::GCfilms::GCAlpacineES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginAlpacineES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + # Código para procesar el resultado de la busqueda + if ($self->{parsingList}){ + # Guardamos la fecha. + if ($self->{inside}->{li} && $self->{insideInfos}){ + $origtext =~ /. \(([0-9]{4})\)/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; + } + # Guardamos el título + if ($self->{inside}->{a} && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + } + } + + else{ + # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto + $origtext =~ s/^\s*|\s{2,}|\s*$//g; + # Estamos procesando el titulo + if ($self->{insideTitle}) + { + # Obtenemos titulo y fecha + $origtext =~ /(.*) \(([0-9]{4})\)/; + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + $self->{insideTitle} = 0; + return; + } + + # Si existe el hipervinculo "Ampliar" cambiamos la imagen por la ampliada + if ($self->{inside}->{a} && $origtext eq "Ampliar"){ + $self->{curInfo}->{image} =~ /(http:\/\/img.alpacine.com\/carteles\/.*)-[0-9]*(\.jpg)/; + $self->{curInfo}->{image} = $1 . $2; + return; + } + # Estamos en la puntuación real + if($self->{insideRating}){ + $self->{curInfo}->{ratingpress} = int( $origtext + 0.5 ); + $self->{insideRating} = 0; + } + # No hay puntuación real, asignamos 0 por defecto + if($self->{inside}->{div}){ + if($origtext =~ /Esperando \d votos/){ + $self->{curInfo}->{ratingpress} = 0; + } + } + # Procesamos el titulo original + if ($self->{isOrigTit} eq 1) { + $self->{isOrigTit} = 0; + $self->{curInfo}->{original} = $origtext; + return; + } + # Procesamos los generos (gen, gen, gen, gen...) + if ($self->{isGenres} eq 1) { + if($origtext ne ""){ + # hacemos uso de sus propias comas + $self->{curInfo}->{genre} .= $origtext; + } + else{ + $self->{isGenres} = 0; + } + return; + } + # Procesamos el país + if ($self->{isCountry} eq 1) { + $self->{isCountry} = 0; + $self->{curInfo}->{country} = $origtext; + return; + } + # Procesamos la duración + if ($self->{isTime} eq 1) { + $self->{isTime} = 0; + $self->{curInfo}->{time} = $origtext; + return; + } + # Procesamos los directores + if ($self->{isDirector} eq 1) { + if($origtext ne ""){ + if($self->{curInfo}->{director} eq ""){ + $self->{curInfo}->{director} .= $origtext; + } + else{ + $self->{curInfo}->{director} .= ", $origtext"; + } + } + else{ + $self->{isDirector} = 0; + } + return; + } + # Actores + if ($self->{isActors} eq 1) { + if($origtext ne ""){ + if($self->{curInfo}->{actors} eq ""){ + $self->{curInfo}->{actors} .= $origtext; + } + else{ + $self->{curInfo}->{actors} .= ", $origtext"; + } + } + else{ + $self->{isActors} = 0; + } + return; + } + # Procesamos la Sinopsis + if ($self->{isSynopsis} eq 1) { + $self->{isSynopsis} = 0; + $self->{curInfo}->{synopsis} = $origtext; + return; + } + # Procesamos los premios + if ($self->{isAwards} eq 1) { + $self->{isAwards} = 0; + $self->{curInfo}->{synopsis} = $self->{curInfo}->{synopsis}. "\n\nPremios:\n\t".$origtext; + $self->{insideInfos} = 0; + return; + } + + # Condiciones para procesar los campos en el siguiente ciclo + if($self->{insideInfos}){ + $self->{isOrigTit} = 1 if $origtext eq "Título original:"; + $self->{isGenres} = 1 if $origtext eq "Género:"; + $self->{isCountry} = 1 if $origtext eq "País:"; + $self->{isTime} = 1 if $origtext eq "Duración:"; + $self->{isDirector} = 1 if $origtext eq "Dirección:"; + $self->{isActors} = 1 if $origtext eq "Interpretación:"; + $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:"; + $self->{isAwards} = 1 if $origtext eq "Premios:"; + } + } + } + + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + # Código para procesar el resultado de la busqueda + #if ($self->{parsingList}){ + #} + # Código para procesar la información de la pelicula seleccionada + #else { + #} + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + # Código para procesar el resultado de la busqueda para generar el listado + if ($self->{parsingList}) + { + # Comprobamos si estamos dentro de un título utilizando el atributo class + if( ($tagname eq "li" ) && ($attr->{class} ne "mas" )){ + $self->{itemIdx}++; + $self->{insideInfos} = 1 ; + return; + } + if( ($tagname eq "li" ) && ($attr->{class} eq "mas" )){ + $self->{insideInfos} = 0; + return; + } + # Si estamos en un título y encontramos una tag a, es un enlace a ficha + if ($tagname eq "a" && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.alpacine.com".$attr->{href}; + return; + } + } + # Código para procesar la información de la pelicula seleccionada + else { + if ($tagname eq "h1"){ + $self->{insideTitle} = 1; + return; + } + # Si estamos dentro de una imagen y el src es el del thumb lo asignamos como imagen + if ($tagname eq "img") + { + # Extraemos la dirección de la imagen thumb + if ($attr->{src} =~ /http:\/\/img.alpacine.com\/carteles\/.*\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src}; + } + return; + } + + if ($tagname eq "div" && $attr->{class} eq "voto"){ + $self->{insideRating} = 1; + return; + } + + if( $tagname eq "div" && $attr->{class} eq "datos" ){ + $self->{insideInfos} = 1 ; + return; + } + } + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # Anulamos el html si coincide con el patron de no resultados + if($html =~ /^.*No hay resultados para.*$/s){ + $html = ""; + return $html; + } + + # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html + # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa + if($html =~ s/^.*<div class="titulo">Pel.culas <span class="resultados">\([0-9]* resultado[s]?\)<\/span><\/div><ul>(<li><a.*<\/a> \([0-9]*\)<\/li>).*$/$1/s){ + return $html; + } + + # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html + # Comprobamos si la pelicula contiene o no premios y nos quedamos con lo que corresponda + if($html =~ /^.*<div class="titulo">Premios:.*más\.\.\.<\/a><\/div><\/div>.*$/s){ + $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>.*\n.*<div class="datox"><div class="titulo">Premios:.*)más\.\.\.<\/a><\/div><\/div>.*$/$1/s; + } + else{ + $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>)\n\n\t\t\t\t\n\n\t\t\t\t<hr \/>.*$/$1/s; + } + return $html; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + #sub changeUrl + #{ + # my ($self, $url) = @_; + # return $url; + #} + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + #sub getExtra + #{ + # return 'Extra'; + #} + + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return "ES"; + } + + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return "DoVerMan"; + } + + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return 'Alpacine'; + } + + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + #sub getCharset + #{ + # my $self = shift; + # # Charset de la web + # return "UTF-8"; + #} + + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + # Hack para evitar problemas con acentos + $word =~ s/%E1/a/g; + $word =~ s/%E9/e/g; + $word =~ s/%ED/i/g; + $word =~ s/%F3/o/g; + $word =~ s/%FA/u/g; + $word =~ s/%C1/A/g; + $word =~ s/%C9/E/g; + $word =~ s/%CD/I/g; + $word =~ s/%D3/O/g; + $word =~ s/%DA/U/g; + + return "http://www.alpacine.com/buscar/?buscar=" . $word; + + } + + + # Constructor + sub new + { + # Inicialización + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran + # en el listado de resultados + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + # Indica si estamos procesando información útil + $self->{insideInfos} = 0; + + # Indican el estado del procesado del listado de resultados + $self->{insideRating} = 0; + $self->{insideTitle} = 0; + + $self->{isOrigTit} = 0; + $self->{isGenres} = 0; + $self->{isCountry} = 0; + $self->{isTime} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isSynopsis} = 0; + $self->{isAwards} = 0; + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazon.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazon.pm new file mode 100644 index 0000000..8692a1b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazon.pm @@ -0,0 +1,281 @@ +package GCPlugins::GCfilms::GCAmazon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsAmazonCommon; + +{ + package GCPlugins::GCfilms::GCPluginAmazon; + + use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq 'input') + { + $self->{beginParsing} = 1 + if $attr->{src} =~ /go-button-search/; + } + return if ! $self->{beginParsing}; + if ($tagname eq 'srtitle') + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'publication') + { + $self->{isPublication} = 1; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1; + } + if ($tagname eq 'a') + { + my $urlId; + if ($urlId = $self->isItemUrl($attr->{href})) + { + $self->{isTitle} = 2 if $self->{isTitle} eq '1'; + return if $self->{alreadyRetrieved}->{$urlId}; + $self->{alreadyRetrieved}->{$urlId} = 1; + $self->{currentRetrieved} = $urlId; + my $url = $attr->{href}; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if (($tagname eq "img") && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'content')) + { + $self->{insideContent} = 1; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescriptionWrapper')) + { + $self->{insideSynopsis} = 1 + if (!$self->{curInfo}->{synopsis}); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'emptyClear')) + { + $self->{insideSynopsis} = 0; + } + elsif (($tagname eq "span") && ($self->{insideAge}) && ($attr->{class} =~ /medSprite/)) + { + $attr->{class} =~ s/\s*$//; + $self->{curInfo}->{age} = 2 if ($attr->{class} =~ m/G$/); + $self->{curInfo}->{age} = 5 if ($attr->{class} =~ m/PG$/); + $self->{curInfo}->{age} = 13 if ($attr->{class} =~ m/PG13$/); + $self->{curInfo}->{age} = 18 if ($attr->{class} =~ m/R$/); + $self->{insideAge} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + if ($tagname eq "li") + { + $self->{insideActors} = 0; + $self->{insideDirector} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + return if GCPlugins::GCstar::GCPluginAmazonCommon::text(@_); + return if length($origtext) < 2; + return if ($self->{parsingEnded}); + if ($self->{parsingList}) + { + return if ! $self->{beginParsing}; + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + return; + } + elsif ($self->{isPublication}) + { + $origtext =~ m/([0-9]{4})/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + $self->{isPublication} = 0; + return; + } + elsif ($self->{isActors}) + { + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{isActors} = 0; + return; + } + } + else + { + $origtext =~ s/\s{2,}/ /g; + + if ($self->{insideNameAndDate}) + { + if ($origtext =~ m/(.*) \(([0-9]{4})\)/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + } + $self->{insideNameAndDate} = 0; + } + elsif (($self->{insideActors}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/\s+,/,/; + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$origtext]; + $self->{actorsCounter}++; + } + } + elsif (($self->{insideDirector}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} .= ", " + if $self->{curInfo}->{director}; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^\s//; + $origtext =~ s/\n//g; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + $origtext =~ s/\s*$//; + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + elsif (($self->{insideSynopsis}) && ($origtext ne '')) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{inside}->{b}) + { + $self->{insideActors} = 1 if $origtext =~ /Actors:/; + $self->{insideDirector} = 1 if $origtext =~ /Directors:/; + $self->{insideTime} = 1 if $origtext =~ /Run Time:/; + $self->{insideGenre} = 1 if $origtext =~ /Genre:/; + $self->{insideAge} = 1 if $origtext =~ /Rating:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 1, + }; + + $self->{suffix} = 'com'; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html = $self->SUPER::preProcess($html); + if ($self->{parsingList}) + { + $self->{isItem} = 0; + $html =~ s|~(.*?)<span class="bindingBlock">\(<span class="binding">(.*?)</span>( - .*?[0-9]{4})?\)</span>|<actors>$1</actors><format>$2</format><publication>$3</publication>|gsm; + } + else + { + $html =~ s/(<i>|<\/i>)//gim; + $html =~ s/<p>/\n/gim; + $html =~ s|</p>|\n|gim; + $html =~ s/(<ul>|<\/ul>)/\n/gim; + $html =~ s/<li>([^<])/- $1/gim; + $html =~ s|([^>])</li>|$1\n|gim; + $html =~ s|<br ?/?>|\n|gi; + $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm; + $html =~ s|<a href="[^"]*search-alias=dvd&field-keywords=[^"]*">(.*?)</a>|$1|gm; + } + $self->{parsingEnded} = 0; + $self->{alreadyRetrieved} = {}; + $self->{beginParsing} = 0; + + return $html; + } + + sub getName + { + return "Amazon (US)"; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazonDE.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazonDE.pm new file mode 100644 index 0000000..fab2b9f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazonDE.pm @@ -0,0 +1,291 @@ +package GCPlugins::GCfilms::GCAmazonDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsAmazonCommon; + +{ + package GCPlugins::GCfilms::GCPluginAmazonDE; + + use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq 'input') + { + $self->{beginParsing} = 1 + if $attr->{src} =~ /go-button-search/; + } + return if ! $self->{beginParsing}; + return if ! $self->{beginParsing}; + if ($tagname eq 'srtitle') + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'publication') + { + $self->{isPublication} = 1; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1; + } + if ($tagname eq 'a') + { + my $urlId; + if ($urlId = $self->isItemUrl($attr->{href})) + { + $self->{isTitle} = 2 if $self->{isTitle} eq '1'; + return if $self->{alreadyRetrieved}->{$urlId}; + $self->{alreadyRetrieved}->{$urlId} = 1; + $self->{currentRetrieved} = $urlId; + my $url = $attr->{href}; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if (($tagname eq "img") && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'content')) + { + $self->{insideContent} = 1; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescriptionWrapper')) + { + $self->{insideSynopsis} = 1 + if (!$self->{curInfo}->{synopsis}); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'emptyClear')) + { + $self->{insideSynopsis} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + if ($tagname eq "li") + { + $self->{insideActors} = 0; + $self->{insideDirector} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if GCPlugins::GCstar::GCPluginAmazonCommon::text(@_); + return if length($origtext) < 2; + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + return if ! $self->{beginParsing}; + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + return; + } + elsif ($self->{isPublication}) + { + $origtext =~ m/([0-9]{4})/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + $self->{isPublication} = 0; + return; + } + elsif ($self->{isActors}) + { + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{isActors} = 0; + return; + } + } + else + { + $origtext =~ s/\s{2,}//g; + if ($self->{insideNameAndDate}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideNameAndDate} = 0; + }
+ elsif (($self->{insideActors}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/\s+,/,/; + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$origtext]; + $self->{actorsCounter}++; + } + }
+ elsif (($self->{insideDirector}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} .= ", " + if $self->{curInfo}->{director}; + $self->{curInfo}->{director} .= $origtext; + }
+ elsif ($self->{insideTime})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g;
+ $self->{curInfo}->{time} = $origtext;
+ $self->{insideTime} = 0;
+ }
+ elsif ($self->{insideDate})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g; + $origtext =~ s/\-$//;
+ $self->{curInfo}->{date} = $origtext;
+ $self->{insideDate} = 0;
+ }
+ elsif (($self->{insideSynopsis}) && ($origtext ne '')) + { + $self->{curInfo}->{synopsis} .= $origtext; + }
+ elsif ($self->{insideAudio})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{audio} = $origtext;
+ $self->{insideAudio} = 0;
+ }
+ elsif ($self->{insideSubTitle})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{subt} = $origtext;
+ $self->{insideSubTitle} = 0;
+ } + elsif ($self->{inside}->{b}) + { + $self->{insideActors} = 1 if $origtext =~ /Darsteller:/; + $self->{insideDirector} = 1 if $origtext =~ /Regisseur\(e\):/; + $self->{insideDate} = 1 if $origtext =~ /Erscheinungstermin:/; + $self->{insideTime} = 1 if $origtext =~ /Spieldauer:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 1, + }; + + $self->{suffix} = 'de'; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html = $self->SUPER::preProcess($html); + if ($self->{parsingList}) + { + $self->{isItem} = 0; + $html =~ s|~(.*?)<span class="bindingBlock">\(<span class="binding">(.*?)</span>( - .*?[0-9]{4})?\)</span>|<actors>$1</actors><format>$2</format><publication>$3</publication>|gsm; + + } + else + { + $html =~ s/(<i>|<\/i>)//gim; + $html =~ s/<p>/\n/gim; + $html =~ s|</p>|\n|gim; + $html =~ s/(<ul>|<\/ul>)/\n/gim; + $html =~ s/<li>([^<])/- $1/gim; + $html =~ s|([^>])</li>|$1\n|gim; + $html =~ s|<br ?/?>|\n|gi; + $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm; + $html =~ s|<a href="[^"]*search-alias=dvd&field-keywords=[^"]*">(.*?)</a>|$1|gm; + #" + $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd-de&field-(?:actor|director|keywords)=[^\/]*\/[-0-9]*">([^<]*)<\/a>/$1/gm; + } + + $self->{parsingEnded} = 0; + $self->{alreadyRetrieved} = {}; + $self->{beginParsing} = 1; + + return $html; + } + + sub getName + { + return "Amazon (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazonFR.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazonFR.pm new file mode 100644 index 0000000..cbb6674 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazonFR.pm @@ -0,0 +1,304 @@ +package GCPlugins::GCfilms::GCAmazonFR;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCfilms::GCfilmsAmazonCommon;
+
+{
+ package GCPlugins::GCfilms::GCPluginAmazonFR;
+
+ use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase);
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingEnded})
+ {
+ if ($self->{itemIdx} < 0)
+ {
+ $self->{itemIdx} = 0;
+ $self->{itemsList}[0]->{url} = $self->{loadedUrl};
+ }
+ return;
+ }
+
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'input')
+ {
+ $self->{beginParsing} = 1
+ if $attr->{src} =~ /go-button-search/;
+ }
+ return if ! $self->{beginParsing};
+ if ($tagname eq 'srtitle') + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'publication') + { + $self->{isPublication} = 1; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1; + } + if ($tagname eq 'a') + { + my $urlId; + if ($urlId = $self->isItemUrl($attr->{href})) + { + $self->{isTitle} = 2 if $self->{isTitle} eq '1'; + return if $self->{alreadyRetrieved}->{$urlId}; + $self->{alreadyRetrieved}->{$urlId} = 1; + $self->{currentRetrieved} = $urlId; + my $url = $attr->{href}; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + }
+ }
+ else
+ {
+ if (($tagname eq "img") && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'content')) + { + $self->{insideContent} = 1; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescriptionWrapper')) + { + $self->{insideSynopsis} = 1 + if (!$self->{curInfo}->{synopsis}); + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'emptyClear')) + { + $self->{insideSynopsis} = 0; + } + elsif (($tagname eq "span") && ($self->{insideAge}) && ($attr->{class} =~ /medSprite/)) + { + $attr->{class} =~ s/\s*$//; + $self->{curInfo}->{age} = 2 if ($attr->{class} =~ m/G$/); + $self->{curInfo}->{age} = 5 if ($attr->{class} =~ m/PG$/); + $self->{curInfo}->{age} = 13 if ($attr->{class} =~ m/PG13$/); + $self->{curInfo}->{age} = 18 if ($attr->{class} =~ m/R$/); + $self->{insideAge} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--; + if ($tagname eq "li") + { + $self->{insideActors} = 0; + $self->{insideDirector} = 0; + }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ return if length($origtext) < 2;
+
+ if ($self->{parsingList})
+ { + return if ! $self->{beginParsing}; + if (($self->{inside}->{title})
+ && ($origtext !~ /^Amazon.fr/))
+ {
+ $self->{parsingEnded} = 1;
+ }
+ if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + return; + } + elsif ($self->{isPublication}) + { + $origtext =~ m/([0-9]{4})/; + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + $self->{isPublication} = 0; + return; + } + elsif ($self->{isActors}) + { + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{isActors} = 0; + return; + }
+ }
+ else
+ {
+ $origtext =~ s/\s{2,}//g;
+
+ if ($self->{insideNameAndDate}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideNameAndDate} = 0; + }
+ elsif (($self->{insideActors}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/\s+,/,/; + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$origtext]; + $self->{actorsCounter}++; + } + }
+ elsif (($self->{insideDirector}) && ($origtext !~ /^,/)) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} .= ", " + if $self->{curInfo}->{director}; + $self->{curInfo}->{director} .= $origtext; + }
+ elsif ($self->{insideTime})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g;
+ $self->{curInfo}->{time} = $origtext;
+ $self->{insideTime} = 0;
+ }
+ elsif ($self->{insideDate})
+ {
+ $origtext =~ s/^\s//;
+ $origtext =~ s/\n//g; + $origtext =~ s/\-$//;
+ $self->{curInfo}->{date} = $origtext;
+ $self->{insideDate} = 0;
+ }
+ elsif (($self->{insideSynopsis}) && ($origtext ne '')) + { + $self->{curInfo}->{synopsis} .= $origtext; + }
+ elsif ($self->{insideAudio})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{audio} = $origtext;
+ $self->{insideAudio} = 0;
+ }
+ elsif ($self->{insideSubTitle})
+ {
+ $origtext =~ s/^\s//;
+ $self->{curInfo}->{subt} = $origtext;
+ $self->{insideSubTitle} = 0;
+ }
+ elsif ($self->{inside}->{b})
+ {
+ $self->{insideActors} = 1 if $origtext =~ /Acteurs\s*:/;
+ $self->{insideDirector} = 1 if $origtext =~ /R.alisateurs?\s*:/;
+ $self->{insideDate} = 1 if $origtext =~ /Date de sortie/;
+ $self->{insideTime} = 1 if $origtext =~ /Dur.e\s*:/;
+ $self->{insideAudio} = 1 if $origtext =~ /Langue\s*:/;
+ $self->{insideSubTitle} = 1 if $origtext =~ /Sous-titres\s*:/;
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ date => 1,
+ director => 0,
+ actors => 1,
+ };
+
+ $self->{suffix} = 'fr';
+
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ $html = $self->SUPER::preProcess($html);
+ if ($self->{parsingList})
+ {
+ $self->{isItem} = 0; + $html =~ s|~(.*?)<span class="bindingBlock">\(<span class="binding">(.*?)</span>( - .*?[0-9]{4})?\)</span>|<actors>$1</actors><format>$2</format><publication>$3</publication>|gsm; +
+ }
+ else
+ {
+ $html =~ s/(<i>|<\/i>)//gim;
+ $html =~ s/<p>/\n/gim;
+ $html =~ s|</p>|\n|gim;
+ $html =~ s/(<ul>|<\/ul>)/\n/gim;
+ $html =~ s/<li>([^<])/- $1/gim;
+ $html =~ s|([^>])</li>|$1\n|gim;
+ $html =~ s|<br ?/?>|\n|gi;
+ $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm;
+# $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd-fr&field-(?:actor|director|keywords)=[^\/]*\/[-0-9]*">([^<]*)<\/a>/$1/gm;
+ $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd-fr&field-(?:actor|director|keywords)=[^\/]*">([^<]*)<\/a>/$1/gm;
+ #"
+ }
+
+ $self->{parsingEnded} = 0;
+ $self->{alreadyRetrieved} = {};
+ $self->{beginParsing} = 1;
+
+ return $html;
+ }
+
+ sub getName
+ {
+ return "Amazon (FR)";
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAmazonUK.pm b/lib/gcstar/GCPlugins/GCfilms/GCAmazonUK.pm new file mode 100644 index 0000000..096bb08 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAmazonUK.pm @@ -0,0 +1,264 @@ +package GCPlugins::GCfilms::GCAmazonUK; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# Edited 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsAmazonCommon; + +{ + package GCPlugins::GCfilms::GCPluginAmazonUK; + + use base qw(GCPlugins::GCfilms::GCfilmsAmazonPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if (($self->{beginParsing} eq 0) && ($tagname eq 'div') && ($attr->{id} eq 'Results')) + { + $self->{beginParsing} = 1; + } + if (($self->{beginParsing}) && ($tagname eq 'table') && ($attr->{class} eq 'pagnTable')) + { + $self->{beginParsing} = 0; + $self->{parsingEnded} = 1; + } + return if ! $self->{beginParsing}; + if ($tagname eq 'a') + { + if (($self->{isItem}) && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isUrl} = 0 ; + } + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'dataColumn')) + { + $self->{isItem} = 1 ; + $self->{isUrl} = 1 ; + $self->{isName} = 1 ; + } + } + else + { + if ($tagname eq "img") + { + if (!$self->{curInfo}->{image}) + { + $self->{curInfo}->{image} = $self->extractImage($attr); + } + } + elsif ($tagname eq "span") + { + $self->{insideNameAndDate} = 1 if $attr->{id} eq "btAsinTitle"; + } + elsif (($tagname eq "div") && ($attr->{class} eq "productDescriptionWrapper")) + { + $self->{isSynopsis} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if (($tagname eq 'li') && ($self->{insideActors})) + { + $self->{insideActors} = 0; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + return if GCPlugins::GCstar::GCPluginAmazonCommon::text(@_); + return if length($origtext) < 2; + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + return if ! $self->{beginParsing}; + if ($self->{isName}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isName} = 0; + $self->{isItem} = 0; + $self->{inActors} = 1; + } + elsif ($self->{inActors} && $self->{inside}->{td}) + { + $origtext =~ s/^\W*//; + $self->{itemsList}[$self->{itemIdx}]->{actors} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{actors}; + $self->{inActors} = 0; + return; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideNameAndDate}) + { + (my $year = $origtext) =~ s/.*\[([0-9]{4})\].*/$1/ ; + (my $title = $origtext) =~ s/^([^\[]*).*$/$1/ ; + $self->{curInfo}->{title} = $title; + $self->{curInfo}->{origtitle} = $title; + $self->{curInfo}->{date} = $year; + $self->{insideNameAndDate} = 0; + } + elsif (($self->{insideActors}) && $self->{inside}->{a}) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{actors} .= $origtext.', '; + } + elsif ($self->{insideAge}) + { + $origtext =~ m/([0-9]{1,2})/; + $self->{curInfo}->{age} = $1; + $self->{insideAge} = 0; + } + elsif ($self->{insideDirector}) + { + $origtext =~ s/^\s//; + $origtext =~ s/,.$//; + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^\s//; + $origtext =~ s/\n//g; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext if ! $self->{hasSynopsis}; + $self->{isSynopsis} = 0; + $self->{hasSynopsis} = 1; + } + elsif ($self->{inside}->{b}) + { + $self->{insideActors} = 1 if $origtext =~ /Actors:/; + $self->{insideDirector} = 1 if $origtext =~ /Directors:/; + $self->{insideAge} = 1 if $origtext =~ /Classification:/; + $self->{insideTime} = 1 if $origtext =~ /Run Time:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 1, + }; + + $self->{suffix} = 'co.uk'; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html = $self->SUPER::preProcess($html); + if ($self->{parsingList}) + { + $self->{isItem} = 0; + } + else + { + $html =~ s/(<i>|<\/i>)//gim; + $html =~ s/<p>/\n/gim; + $html =~ s|</p>|\n|gim; + $html =~ s/(<ul>|<\/ul>)/\n/gim; + $html =~ s/<li>([^<])/- $1/gim; + $html =~ s|([^>])</li>|$1\n|gim; + $html =~ s|<br ?/?>|\n|gi; + $html =~ s|<a href="/gp/imdb/[^"]*">(.*?)</a>|$1|gm; + $html =~ s|<a href="[^"]*search-alias=dvd&field-keywords=[^"]*">(.*?)</a>|$1|gm; + + $html =~ s/<a href="\/exec\/obidos\/search-handle-url\/index=dvd&field-(?:actor|director|keywords)=[^\/]*\/[-0-9]*">([^<]*)<\/a>/$1/gm; + } + + $self->{parsingEnded} = 0; + $self->{alreadyRetrieved} = {}; + $self->{beginParsing} = 0; + + return $html; + } + + sub getName + { + return "Amazon (UK)"; + } + + sub getLang + { + return 'EN'; + } + + sub getAuthor + { + return 'Tian & FiXx'; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAniDB.pm b/lib/gcstar/GCPlugins/GCfilms/GCAniDB.pm new file mode 100644 index 0000000..1c62e7c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAniDB.pm @@ -0,0 +1,279 @@ +package GCPlugins::GCfilms::GCAniDB; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginAniDB; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($tagname eq 'a') + { + if ($attr->{href} =~ m/animedb\.pl\?show=animeatt&aid=([0-9]*)/) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = "animedb\.pl\?show=anime&aid=" . $1; + } + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq 'a') + { + if ($attr->{href} =~ m/animedb\.pl\?show=anime&aid=[0-9]*/) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++ if ($self->{itemIdx} < 0) || ($attr->{href} ne $self->{itemsList}[$self->{itemIdx}]->{url}); + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + } + } + elsif ($tagname eq 'td') + { + $self->{isYear} = 1 if ($attr->{class} eq 'date year'); + } + elsif ($tagname eq 'h1') + { + $self->{insideHeadline} = 1; + } + } + else + { + if ($tagname eq 'img') + { + if ($attr->{src} =~ m/http\:\/\/img[0-9]\.anidb\.info\/pics\/anime\/[0-9]*\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src} if !$self->{curInfo}->{image}; + } + } + elsif ($tagname eq 'p') + { + if ($attr->{class} eq 'desc') + { + $self->{insideSynopsis} = 1; + } + } + elsif ($tagname eq 'th') + { + $self->{isField} = 1 if $attr->{class} eq 'field'; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + $self->{insideSynopsis} = 0 if $tagname eq 'p'; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{insideHeadline}) + { + $self->{parsingEnded} = 1 if $origtext !~ m/Anime List - Search for:/; + $self->{insideHeadline} = 0; + } + + if ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext + if ! $self->{itemsList}[$self->{itemIdx}]->{title}; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isYear}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext;# if $origtext =~ m/^ [0-9]{4}(-[0-9]{4})? $/; + $self->{isYear} = 0; + } + } + else + { + if ($self->{insideSynopsis}) + { + $origtext =~ s/\s{2,}/ /g; + $self->{curInfo}->{synopsis} .= $origtext; + #$self->{curInfo}->{synopsis} =~ s|GCBRGC|<br>|g; + #$self->{curInfo}->{synopsis} =~ s/^\s*//; + $self->{insideSynopsis} = 0; + } +# elsif ($self->{inside}->{div}) +# { +# $self->{curInfo}->{title} = $1 if $origtext =~ m/Title: (.*) /; +# if ($origtext =~ m/(?:Jap. Kanji|English): (.*) /) +# { +# $self->{curInfo}->{original} = $1; +# } +# $self->{curInfo}->{date} = $1 if $origtext =~ m/Year: (.*)/; +# $self->{curInfo}->{director} = $1 if $origtext =~ m/Companies: (.*) /; +# if ($origtext =~ m/Genre: (.*)/) +# { +# $self->{curInfo}->{genre} = $1; +# $self->{curInfo}->{genre} =~ s/ - //; +# } +# } + elsif ($self->{isField}) + { + $self->{isTitle} = 1 if $origtext eq 'Title'; + $self->{isOrig} = 1 if $origtext =~ /kanji/i; + $self->{isYear} = 1 if $origtext eq 'Year'; + $self->{isGenre} = 1 if $origtext eq 'Genre'; + $self->{isField} = 0; + } + elsif ($self->{inside}->{td}) + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isOrig} = 0; + } + elsif ($self->{isYear}) + { + $self->{curInfo}->{date} = $origtext; + $self->{isYear} = 0; + } + elsif ($self->{isGenre}) + { + ($self->{curInfo}->{genre} = $origtext) =~ s/\s//g; + $self->{curInfo}->{genre} =~ s/-$//; + $self->{isGenre} = 0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{isField} = 0; + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/<a href="animedb\.pl\?show=producer&prid=[0-9]*" title="[^"]*">([^<]*)<\/a>/$1/g; + $html =~ s/<a href="animedb\.pl\?show=genre" target="_blank">(Genre:)<\/a>/$1/g; + $html =~ s/<a href="animedb\.pl\?show=animelist&genid=[^"]*" title="[^"]*">([^<]*)<\/a>/$1/g; + $html =~ s/ - <a href="animedb\.pl\?show=search&do\.search=1(&search\.anime.genre.[0-9]*=on){1,}" title="search for other animes with all of these genres">\[similar\]<\/a> //; + #$html =~ s/<td> ([^:]*): <\/td>\s*<td> ([^<]*) ?<\/td>/<div>$1: $2<\/div>/g; + $html =~ s/<br \/>/\n/g; + $html =~ s/<b>Awards:<\/b><br><a href="[^"]*" target="_blank"><img src="[^"]*" border=0 alt="[^"]*" title="[^"]*"><\/a> <hr>//g; + + #Removed italic strings (useful for synopsis source) + $html =~ s|<i>(.*?)</i>|$1|g; + #Extract synopsis + #$html =~ s|<td>([^<]*?)</td>\s*?</tr>\s*?</table>\s*?<hr>|<div class="synopsis">$1</div>|ms; + + #Remove Headline tag + $html =~ s/>\W*?<!-- headline -->/>/; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://anidb.info/perl-bin/animedb.pl?show=animelist&adb.search=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://anidb.info/perl-bin/' . $url; + } + + sub getName + { + return 'AniDB'; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAnimator.pm b/lib/gcstar/GCPlugins/GCfilms/GCAnimator.pm new file mode 100644 index 0000000..45704d0 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAnimator.pm @@ -0,0 +1,236 @@ +package GCPlugins::GCfilms::GCAnimator; + +################################################### +# +# Copyright 2005-2009 zserghei +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAnimator; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if ($url =~ m/\/db\/\?p\=show\_film/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "td" && $attr->{class} eq "FilmName") + { + $self->{insideTitle} = 1; + } + elsif ($tagname eq "td" && $attr->{class} eq "FilmType") + { + $self->{insideTime} = 1; + $self->{insideDate} = 1; + } + elsif ($tagname eq "td" && $attr->{class} eq "FilmComments") + { + $self->{insideSynopsis} = 1; + } + elsif ($tagname eq "img") + { + $self->{curInfo}->{image} = $attr->{src} + if !$self->{curInfo}->{image} + && ($attr->{id} eq "SlideShow" || $attr->{width} =~ m/3\d{2}/); + $self->{curInfo}->{image} = "http://www.animator.ru/" . $self->{curInfo}->{image} + if $self->{curInfo}->{image} =~ m/^\//; + $self->{curInfo}->{image} = "http://www.animator.ru/db/" . $self->{curInfo}->{image} + if $self->{curInfo}->{image} =~ m/^\.\.\//; + $self->{insideImage} = 0; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + my ($title, $date); + if ($origtext =~ m/«(.*)»\s\(([0-9]*)\s.+\)/) + { + ($title, $date) = ($1, $2); + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $title; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $date; + } + else + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + $self->{isMovie} = 0; + return; + } + } + else + { + utf8::decode($origtext); + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{insideTitle}) + { + $origtext =~ s/^\W//; + $origtext =~ s/\W$//; + $origtext = ucfirst(lc($origtext)); + $self->{curInfo}->{title} = $origtext; + $self->{curInfo}->{genre} = "Мультфильм"; + $self->{curInfo}->{audio} = "русский"; + $self->{insideTitle} = 0; + } + elsif ($self->{insideDate}) + { + if ($origtext =~ m/([0-9]+)\sг/) + { + $self->{curInfo}->{date} = $1; + if ($self->{curInfo}->{date} < 1992) + { + $self->{curInfo}->{country} = "СССР"; + } + else + { + $self->{curInfo}->{country} = "Россия"; + } + $self->{insideDate} = 0; + } + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= + $self->{curInfo}->{synopsis} ? "\n" . $origtext : $origtext; + $self->{insideSynopsis} = 0; + } + if ($self->{insideTime}) + { + if ($origtext =~ m/,\s+([0-9]+)\s+мин/) + { + $self->{curInfo}->{time} = $1; + $self->{insideTime} = 0; + } + } + if ($self->{inside}->{td}) + { + $self->{insideDirector} = 1 if $origtext =~ m/режиссер/; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub getName + { + return "Animator"; + } + + sub getAuthor + { + return 'zserghei'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "KOI8-R"; +# return "Windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.animator.ru/db/?p=search&text=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.animator.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + $self->{parsingEnded} = 0; + $html =~ tr + {АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя} + {юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ}; + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAnimeNfoA.pm b/lib/gcstar/GCPlugins/GCfilms/GCAnimeNfoA.pm new file mode 100644 index 0000000..9347637 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAnimeNfoA.pm @@ -0,0 +1,266 @@ +package GCPlugins::GCfilms::GCAnimeNfoA; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAnimeNfoA; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ m/animetitle,[0-9]*,[a-z]*,[a-z0-9_]*\.html/) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + } + elsif ($tagname eq "td") + { + if ($attr->{class} eq "anime_info") + { + $self->{couldBeYear} = 1; + } + } + } + else + { + if ($tagname eq 'table') + { + if ($attr->{class} eq 'anime_info') + { + $self->{insideInfos} = 1; + } + } + elsif ($tagname eq 'img') + { + if ($attr->{class} eq 'float') + { + $self->{curInfo}->{image} = 'http://www.animenfo.com/' . $attr->{src}; + } + } + elsif ($tagname eq 'a') + { + if ($attr->{href} =~ /animebygenre\.php\?genre=/) + { + $self->{insideGenre} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if (length($origtext) < 2) && ($origtext !~ /\d+$/); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{couldBeYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext if $origtext =~ m/^[0-9]{4}$/; + $self->{couldBeYear} = 0; + return; + } + } + else + { + if ($self->{insideInfos}) + { + if ($origtext eq "Title") + { + $self->{insideName} = 1; + } + elsif ($origtext eq "Japanese Title") + { + $self->{insideOrig} = 1; + } + elsif ($origtext eq "Total Episodes") + { + $self->{insideTime} = 1; + } + elsif ($origtext eq "Year Published") + { + $self->{insideDate} = 1; + } + elsif ($origtext eq "Director") + { + $self->{insideDirector} = 1; + } + elsif ($origtext eq "User Rating") + { + $self->{insideRating} = 1; + } + elsif ($origtext =~ m/Description/) + { + $self->{insideSynopsis} = 1; + } + elsif ($self->{insideName}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideName} = 0; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext if $origtext ne "Official Site"; + $self->{insideOrig} = 0; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^(\d+)\s*(.*)/$1 episodes $2/; + $self->{curInfo}->{time} .= $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + $self->{curInfo}->{genre} .= $origtext . ','; + $self->{insideGenre} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext if $origtext =~ m/[0-9]{4}/; + $self->{insideDate} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext if $origtext ne "US Distribution"; + $self->{insideDirector} = 0; + } + elsif ($self->{insideRating}) + { + $origtext =~ m|([\d\.]+)/10\.0|; + $self->{curInfo}->{ratingpress} = int ($1 + 0.5); + $self->{insideRating} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext if !$self->{curInfo}->{synopsis}; + $self->{insideSynopsis} = 0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s/<br \/>/\n/g; + $html =~ s/<script language='JavaScript'>.*?<\/script>//g; + $html =~ s|<i>([^<]*)</i>|$1|g; + $html =~ s|\t||g; + $html =~ s/<a onMouseOut='[^']*' onMouseOver='[^']*' href='animebygenre\.php\?genre=[0-9]*'>([^<]*)<\/a>/$1/g; + $html =~ s/<a href='animebyyear\.php\?year=[0-9]{4}'>([0-9]{4})<\/a>/<font class='DefaultFont'>$1<\/font>/; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.animenfo.com/search.php?option=keywords&queryin=anime_titles&query=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.animenfo.com/' . $url; + } + + sub getName + { + return "AnimeNfo Anime"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'EN'; + } + + sub getNotConverted + { + my $self = shift; + return ['orig']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAnimeka.pm b/lib/gcstar/GCPlugins/GCfilms/GCAnimeka.pm new file mode 100644 index 0000000..f4e3c77 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAnimeka.pm @@ -0,0 +1,295 @@ +package GCPlugins::GCfilms::GCAnimeka; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginAnimeka; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if (($tagname eq 'form') + && ($attr->{name} eq 'form_note_serie') + && (! $self->{itemsList}[0]->{url})) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $attr->{action}; + } + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq "img") + && ($attr->{class} eq "rechercheindeximg") + && ($attr->{alt} eq "Animesindex")) + { + $self->{parsingEnded} = 1 if $attr->{src} !~ /rechercheindex\.gif/; + } + + if ($tagname eq "a") + { + if (($attr->{href} =~ /^\/animes\/detail\//)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{class} eq "picture") + { + $self->{curInfo}->{image} = "http://animeka.com" . $attr->{src}; + } + elsif (($attr->{class} eq "animeslegendimg") + && ($attr->{src} =~ /^\/_distiller\/show_flag\.php\?id=/)) + { + if (!$self->{curInfo}->{country}) + { + $self->{curInfo}->{country} = $attr->{alt}; + } + elsif ($self->{curInfo}->{country} !~ $attr->{alt}) + { + $self->{curInfo}->{country} .= ", " . $attr->{alt}; + } + } + } + elsif ($tagname eq "td") + { + $self->{insideInfos} = 1 if $attr->{class} eq "animestxt"; + $self->{insideName} = 1 if $attr->{class} eq "animestitle"; + } + elsif ($tagname eq "div") + { + $self->{insideSynopsis} = 1 if $attr->{class} eq "synopsis"; + $self->{insideAlternate} = 1 if $attr->{class} eq "alternate"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{inside}->{script}) + { + if ($origtext =~ /document\.location\.href="(.*?)"/) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $1; + } + return; + } + + if ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isYear}) + { + $origtext =~ s/ : ([0-9]{4}) - [0-9]*\s*[A-Z]*/$1/; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext; + $self->{isYear} = 0; + } + elsif ($self->{inside}->{u}) + { + $self->{isYear} = 1 if $origtext =~ /Ann.e \/ nombre et format/; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideInfos}) + { + $origtext =~ s/(.*), $/$1/; + if ($origtext =~ /TITRE ORIGINAL : (.*)/) + { + $self->{curInfo}->{original} = $1; + } + elsif ($origtext =~ /AUTEUR(?:S)? : (.*)/) + { + $self->{curInfo}->{director} = $self->capWord($1); + } + elsif (($origtext =~ /VOLUMES, TYPE . DUR.E : (.*)/) + || ($origtext =~ /TYPE . DUR.E : (.*)/)) + { + $self->{curInfo}->{time} = $self->capWord($1); + } + elsif ($origtext =~ /ANN.E DE PRODUCTION : (.*)/) + { + $self->{curInfo}->{date} = $self->capWord($1); + } + elsif ($origtext =~ /GENRE(?:S)? :/) + { + $origtext =~ s/(?:, )|(?: & )/,/g; + $origtext =~ /GENRE(?:S)? : (.*)/; + $self->{curInfo}->{genre} = $self->capWord($1); + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideName}) + { + if ($origtext =~ /(.*?)( \(([0-9]{4})\))?$/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $3; + } + $self->{insideName} = 0; + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ s/\[br\]/\n/g; + $origtext =~ s/\[endline\]//g; + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideAlternate}) + { + $origtext =~ s/\[br\]/\n/g; + $origtext =~ s/\[endline\]//g; + $self->{curInfo}->{original} = $origtext if ! $self->{curInfo}->{original}; + $self->{insideAlternate} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/ / /g; + $html =~ s/&/&/g; + $html =~ s/<b>|<\/b>//g; + $html =~ s/<i>|<\/i>//g; + $html =~ s/<br \/>/\[br\]/g; + $html =~ s/\n/\[endline\]/g; + $html =~ s/<span style="background:#CBD1DD;">([^<]*)<\/span>/$1/g; + $html =~ s/\[<a href="\/animes\/(?:studios|genres|pers)\/.*?\.html">([^<]*)<\/a>\] /$1, /g; + $html =~ s/<a href="\/avis\/index.html"[^>]*>([^<]*)<\/a>/$1/g; + $html =~ s/<td [^>]*>Synopsis<\/td><\/tr><tr><td [^>]*><table [^>]*><tr><td [^>]*>(.*?)<\/td><\/tr><\/table><\/td>/<div class="synopsis">$1<\/div>/; + $html =~ s/<td [^>]*>Titre alternatif<\/td><\/tr><tr><td [^>]*><table [^>]*><tr><td [^>]*>(.*?)<\/td><\/tr><\/table><\/td>/<div class="alternate">$1<\/div>/; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.animeka.com/search/index.html?req=$word&zone_series=1&go_search=1&cat=search"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.animeka.com" . $url; + } + + sub getName + { + return "Animeka.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + sub getCharset + { + return "ISO-8859-1"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCBeyazPerde.pm b/lib/gcstar/GCPlugins/GCfilms/GCBeyazPerde.pm new file mode 100644 index 0000000..e05ce80 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCBeyazPerde.pm @@ -0,0 +1,340 @@ +package GCPlugins::GCfilms::GCBeyazPerde; + +################################################### +# +# Copyright 2007-2009 Zuencap +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginBeyazPerde; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ /\/film\// && $attr->{class} eq "turuncucizgisiz_11_px") + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /^\/images\/film\//) + { + $self->{curInfo}->{image} = "http://beyazperde.mynet.com" . $attr->{src} + if !$self->{curInfo}->{image}; + } + } + elsif ($tagname eq "td") + { + if ($self->{insideSynopsis} == 1) + { + $self->{insideSynopsis} = 2; + } + } + elsif ($tagname eq "h1") + { + if ($attr->{class} eq "baslik_filmadi31") + { + $self->{insideTitle} = 1; + } + } + elsif ($tagname eq "h2") + { + if ($attr->{class} eq "baslik_filmadi32") + { + $self->{insideTitle} = 2; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if (!$self->{parsingList}) + { + if ($tagname eq "table") + { + if ($self->{insideActors}) + { + $self->{insideActors} = 0; + $self->{insideSynopsis} = 1; + } + $self->{insideTime} = 0; + } + elsif ($tagname eq "td") + { + if ($self->{insideSynopsis} == 2) + { + $self->{insideSynopsis} = 0; + } + } + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + if ($self->{parsingList}) + { + if ($self->{isMovie} == 0) + { + return; + } + elsif ($self->{isMovie} == 1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + elsif ($self->{isMovie} == 2) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 if $origtext =~ m/\(([0-9]*)\)/; + } + elsif ($self->{isMovie} == 5) + { + if ($origtext eq "Y:") + { + $self->{isMovie}++; + } + } + elsif ($self->{isMovie} == 7) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + } + elsif ($self->{isMovie} == 9) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $origtext; + $self->{isMovie} = -1; + } + + $self->{isMovie}++; + return; + } + else + { + if ($self->{insideGenre} && ($self->{inside}->{a})) + { + $self->{curInfo}->{genre} = $self->capWord($origtext); + $self->{insideGenre} = 0; + } + elsif ($self->{insideDirector} && ($self->{inside}->{a})) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis} == 2) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + } + elsif ($self->{insideTime}) + { + if ($self->{insideTime} == 1) + { + if ($self->{inside}->{a}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideTime}++; + } + } + elsif ($self->{insideTime} == 2) + { + if ($self->{inside}->{a}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideTime}++; + } + } + elsif ($origtext =~ / dk\./) + { + $origtext =~ s/.*, (.*) dk\./$1 dk\./; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + } + elsif ($self->{insideActors}) + { + if ($self->{inside}->{a}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + elsif ($self->{inside}->{font} && ($origtext =~ m/\((.*)\)/)) + { + # As we incremented it above, we have one more chance here to add a role + # Without <= we would skip the role for last actor + push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}-1]}, $1 + if ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + } + } + elsif ($self->{insideOtherTitles}) + { + if ($origtext =~ m/(.*?) \(International.*/) + { + $self->{curInfo}->{title} = $1; + $self->{insideOtherTitles} = 0; + } + } + elsif ($self->{insideTitle} == 1) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideTitle} == 2) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideTitle} = 0; + } + + if ($self->{inside}->{span}) + { + $self->{insideDirector} = 1 if $origtext =~ m/Y\xf6netmen : /; + $self->{insideGenre} = 1 if $origtext eq "T\xfcr : "; + $self->{insideTime} = 1 if $origtext =~ m/Yapım/; + $self->{insideActors} = 1 + if $origtext =~ m/Oyuncular/ || $origtext =~ m/Seslendirenler/; + if ($origtext =~ m{SinePuan:\s+(\d+\,\d+)\s+}) + { + my $rating = $1; + $rating =~ s/,/./; + $self->{curInfo}->{ratingpress} = int($rating + 0.5); + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #Fix for character-encoding: + $html =~ s/\x85/\.\.\./g; + $html =~ s/\x92/'/g; + $html =~ s/\x93/“/g; + $html =~ s/\x94/”/g; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s/ / /g; + $html =~ s|</a></b><br>|</a><br>|; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://beyazperde.mynet.com/arama.asp?kat=film&keyword=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://beyazperde.mynet.com/'; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getName + { + return "Beyaz Perde"; + } + + sub getAuthor + { + return 'Zuencap'; + } + + sub getLang + { + return 'TR'; + } + + sub getCharset + { + my $self = shift; + + return "utf-8"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCartelesPeliculasES.pm b/lib/gcstar/GCPlugins/GCfilms/GCCartelesPeliculasES.pm new file mode 100644 index 0000000..e0aff39 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCartelesPeliculasES.pm @@ -0,0 +1,351 @@ +package GCPlugins::GCfilms::GCCartelesPeliculasES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginCartelesPeliculasES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + return if length($origtext) < 2; + + # Código para procesar el resultado de la busqueda + if ($self->{parsingList}){ + # Guardamos el título + if ($self->{inside}->{h3} && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + } + return; + } + # Codigo para el contenido de la ficha + else{ + # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto + $origtext =~ s/^\s*|\s{2,}|\s*$//g; + # Estamos procesando el titulo + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + return; + } + # Estamos en la puntuación + if ($self->{inside}->{strong} && ($origtext =~ /[0-5],[0-5][0-5]/)) + { + $self->{curInfo}->{rating} = ($origtext/5)*10; + return; + } + + # Procesamos el titulo original + if ($self->{isOrigTit} eq 1) { + $self->{isOrigTit} = 0; + # Indicamos que en el siguiente paso hay que leer año,pais,duracion + $self->{isOther} = 1; + # Reemplazamos la primera , por # y después obtenemos el texto + $origtext =~ s/,/#/; + $origtext =~ s/#.*//; + $self->{curInfo}->{original} = $origtext; + return; + } + # Procesamos Año, pais, duracion + if ($self->{isOther} eq 1) { + # Comprobamos si tiene el formato de año, pais, duración + if($origtext =~ /^(.*), (.*), (.*)$/){ + $self->{isOther} = 0; + $self->{curInfo}->{date} = $1; + $self->{curInfo}->{country} = $2; + $self->{curInfo}->{time} = $3; + } + return; + } + # Procesamos los directores + if ($self->{isDirector} eq 1) { + $self->{curInfo}->{director} = $origtext; + $self->{isDirector} = 0; + return; + } + # Actores + if ($self->{isActors} eq 1) { + $self->{curInfo}->{actors} = $origtext; + $self->{isActors} = 0; + } + # sinopsis + if ($self->{isSynopsis} eq 1) { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + + # Condiciones para procesar los campos en el siguiente ciclo + if($self->{inside}->{p}){ + $self->{isOrigTit} = 1 if $origtext eq "akas:"; + $self->{isDirector} = 1 if $origtext eq "Director:"; + $self->{isActors} = 1 if $origtext eq "Intérpretes:"; + $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:"; + return; + } + + } + } + + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + # Código para procesar el resultado de la busqueda para generar el listado + if ($self->{parsingList}) + { + # Comprobamos si estamos dentro del marcador que inicia la info de un titulo + if( ($tagname eq "h3" ) && ($attr->{class} eq "entry-title" )){ + # Indicamos que tenemos que se puede leer la info e incrementamos el número de resultados + $self->{itemIdx}++; + $self->{insideInfos} = 1 ; + return; + } + # Si estamos en un título y encontramos una tag a, es un enlace a ficha + if ($tagname eq "a" && $self->{insideInfos}){ + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + return; + } + + if(($tagname eq "div") && ($attr->{class} eq "entry-summary" ) && $self->{insideInfos}){ + $self->{insideInfos} = 0; + return; + } + } + # Código para procesar la información de la pelicula seleccionada + else { + if ($tagname eq "h1"){ + $self->{insideTitle} = 1; + return; + } + # Si estamos dentro de una imagen y no se ha asignado ninguna, la asignamos + if (($tagname eq "img") & !$self->{curInfo}->{image}) + { + # Imágenes en cmg: + # Thumb http://www.cartelespeliculas.com/galeria/albums/003/thumbs_23p47303003.jpg + # ./../../galeria/albums/005/thumbs_23p43025005.jpg + # Normal: http://www.cartelespeliculas.com/galeria/albums/003/23p47303003.jpg + # Extraemos la dirección de la imagen a partir del thumb + if ($attr->{src} =~ /\.\/\.\.\/\.\.\/(galeria\/albums\/[0-9]*\/)thumbs_(.*)$/) + { + $self->{curInfo}->{image} = "http://www.cartelespeliculas.com/". $1 .$2; + } + return; + } + } + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # Anulamos el html si coincide con el patron de no resultados + if($html =~ /^.*Lo sentimos, no se ha encontrado.*$/s){ + $html = ""; + return $html; + } + + # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html + # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa + if($html =~ s/^.*<ul class="hfeed posts-default clearfix">(.*)\t<\/li>\n\t\t<\/ul>.*$/$1/s){ + return $html; + } + + # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html + if($html =~ s/^.*<div id="content" class="section">\n\n\n\n\t\t(.*)<\/li>\n<\/ul>\n<\/div>.*$/$1/s){ + return $html; + } + return $html; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + #sub changeUrl + #{ + # my ($self, $url) = @_; + # return $url; + #} + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + #sub getExtra + #{ + # return 'Extra'; + #} + + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return "ES"; + } + + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return "DoVerMan"; + } + + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "CartelesPeliculas"; + } + + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + #sub getCharset + #{ + # my $self = shift; + # # Charset de la web + # return "UTF-8"; + #} + + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.cartelespeliculas.com/wp/?s=" . $word; + } + + + # Constructor + sub new + { + # Inicialización + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran + # en el listado de resultados + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 0 + }; + + # Indica si estamos procesando información útil + $self->{insideInfos} = 0; + + # Indican el estado del procesado del listado de resultados + $self->{insideTitle} = 0; + + # Indican el estado del procesado del listado de resultados (0 no procesar, 1 es el siguiente, 2 procesando) + $self->{isOther} = 0; + $self->{isTitle} = 0; + $self->{isOrigTit} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isSynopsis} = 0; + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCinemaClock.pm b/lib/gcstar/GCPlugins/GCfilms/GCCinemaClock.pm new file mode 100644 index 0000000..26ce915 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCinemaClock.pm @@ -0,0 +1,271 @@ +package GCPlugins::GCfilms::GCCinemaClock; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginCinemaClock; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ +/http\:\/\/www\.CinemaClock\.com\/aw\/crva\.aw\/p\.clock\/r\.que\/m\.Montreal\/j\.f\/i\./ + ) + { + my $url = $attr->{href}; + $url =~ s/http\:\/\/www\.CinemaClock\.com(.*)/$1/; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($self->{curInfo}->{image} !~ /^\/images\/dvd\//) + { + if ($attr->{src} =~ /^\/images\/dvd\/med\/(.*)\.gif/) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com/images/dvd/" . $1 . ".jpg"; + } + elsif ($attr->{src} =~ /^\/images\/dvd\//) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com" . $attr->{src}; + } + elsif ($attr->{src} =~ /^\/images\/posters\//) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com" . $attr->{src}; + } + elsif ($attr->{src} =~ /^\/images\//) + { + $self->{curInfo}->{image} = + "http://www.cinemaclock.com" . $attr->{src} + if !$self->{curInfo}->{image}; + } + } + } + elsif ($tagname eq "div") + { + $self->{insideInfos} = 1 if $attr->{class} eq "informations"; + $self->{insideName} = 1 if $attr->{class} eq "movietitle"; + } + elsif ($tagname eq "p") + { + $self->{insideSynopsis} = 1 if $attr->{style} eq "text-align: justify"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = + $self->capWord($origtext); + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($origtext =~ /\(([0-9]{4})\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideInfos}) + { + if ($origtext =~ /Ann.e\:.(.*)/) + { + $self->{curInfo}->{date} = $1; + } + elsif ($origtext =~ /Pays\:.(.*)/) + { + $self->{curInfo}->{country} = $1; + } + elsif ($origtext =~ /Genre\:.(.*)/) + { + $self->{curInfo}->{genre} = $self->capWord($1); + $self->{curInfo}->{genre} =~ s/, /,/g; + } + elsif ($origtext =~ /Dur.e\:.(.*)/) + { + $self->{curInfo}->{time} = $1; + } + elsif ($origtext =~ /R.alis..par\:.(.*)/) + { + $self->{curInfo}->{director} = $1; + } + elsif ($origtext =~ /En.vedette\:.(.*)/) + { + $self->{curInfo}->{actors} = $1; + } + elsif ($origtext =~ /Classement\:.(.*)/) + { + $self->{curInfo}->{age} = 2 if $origtext =~ /G/; + $self->{curInfo}->{age} = $1 if $origtext =~ /([0-9]+)/; + } + elsif ($origtext =~ /Guide.parental\:.(.*)/) + { + $self->{curInfo}->{age} = 5 if $self->{curInfo}->{age} == 2; + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideName}) + { + $origtext =~ s/"//g; + $self->{curInfo}->{title} = $origtext; + $self->{insideName} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($origtext =~ /Version fran.aise de(.*)/) + { + $self->{curInfo}->{original} = $1; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #<<< keep perltidy away + $html =~ s{<b>|</b>}{}g; + $html =~ s{<a href="/aw/cpea\.aw/p\.clock/r\.que/m\.Montreal/j\.f/i\.[0-9]*/a\.[^"]*">([^<]*)</a>} + {$1}g; + $html =~ s{<span class=arialb2>([^<]*)</span></td>[^<]*<td><span class=arial2>([^<]*)</span>} + {/<div class="informations">$1$2</div>}g; + $html =~ s{<span class=movietitle>([^<]*)</span>} + {<div class="movietitle">$1</div>}; + $html =~ s{<font color=[^>]*>|</font>|<span class=[^>]*>|</span>} + {}g; + #>>> + +# $html =~ s/<a href="\/aw\/cpea\.aw\/p\.clock\/r\.que\/m\.Montreal\/j\.f\/i\.[0-9]*\/a\.[^"]*">([^<]*)<\/a>/$1/g; +# $html =~ s/<span class=arialb2>([^<]*)<\/span><\/td>[^<]*<td><span class=arial2>([^<]*)<\/span>/<div class="informations">$1$2<\/div>/g; +# $html =~ s/<span class=movietitle>([^<]*)<\/span>/<div class="movietitle">$1<\/div>/; +# $html =~ s/<font color=[^>]*>|<\/font>|<span class=[^>]*>|<\/span>//g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.cinemaclock.com/aw/csra.aw?" + . "p=clock&r=que&m=Montreal&j=f&key=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.cinemaclock.com" . $url; + } + + sub getName + { + return "CinemaClock.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCinemotions.pm b/lib/gcstar/GCPlugins/GCfilms/GCCinemotions.pm new file mode 100644 index 0000000..c90c91a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCinemotions.pm @@ -0,0 +1,284 @@ +package GCPlugins::GCfilms::GCCinemotions; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginCinemotions; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'a') + { + if (($attr->{href} =~ /^\/modules\/Films\/fiche\//) + && ($attr->{class} eq "link4")) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + elsif (($tagname eq 'img') && (($attr->{src} =~ /^\/data\/films\//) + || ($attr->{src} =~ /^\/modules\/Films\/img\/webpasdaffiche\.jpg/))) + { + $self->{isMovie} = 0; + $self->{itemIdx}--; + } + elsif ($tagname eq 'font') + { + if ($attr->{class} eq 'link4dtext') + { + $self->{isInfo}=1; + } + } + } + else + { + if ($tagname eq 'img') + { + if (($attr->{src} =~ m|/data/films/|) + && ($attr->{src} !~ m|/data/films/[^_]*_[0-9]{4}_[0-9]*\.jpg|) + && ($attr->{width} == 150)) + { + $self->{curInfo}->{image} = $attr->{src}; + if ($self->{bigPics}) + { + $self->{curInfo}->{image} =~ s/\/h200\//\//; + } + } + } + elsif ($tagname eq 'font') + { + $self->{insideOrig} = 1 if $attr->{class} eq 'titrevo_film'; + $self->{insideInfos} = 1 if ($attr->{face} eq 'arial') + && ($attr->{size} eq '2'); + $self->{insideArtists} = 1 if ($attr->{face} eq 'verdana,geneva,arial') + && ($attr->{size} eq '2'); + $self->{insideSynopsis} = 1 if ($attr->{class} eq 'link6') + && ($self->{inside}->{fieldset}) + && (!$self->{curInfo}->{synopsis}); + } + elsif ($tagname eq 'h2') + { + $self->{insideOrig} = 1 if $attr->{style} eq 'color: #333333; font-size:13px'; + } + elsif ($tagname eq 'br') + { + if ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= "\n"; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + $self->{insideSynopsis} = 0 if $tagname eq 'font'; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + $origtext =~ s/\s{2,}//g; + $origtext =~ s/\n*//g if !$self->{insideSynopsis}; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + if (($self->{inside}->{h1}) || ($self->{inside}->{h2})) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + } + elsif ($self->{isInfo}) + { + if (($origtext =~ /([0-9]{4}) - [0-9]*h[0-9]*/) + || ($origtext =~ /([0-9]{4}) - [0-9]* mn/)) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $1; + } + elsif ($origtext =~ /^\s*R.alisation : (.*)/) + { + $self->{itemsList}[$self->{itemIdx}]->{director} =$1; + } + elsif ($origtext =~ /^\s*avec (.*)/) + { + $self->{itemsList}[$self->{itemIdx}]->{actors} = $1; + $self->{isInfo} = 0; #$html =~ s|<br\s*/>|\n|g; + + } + } + } + else + { + if ($self->{inside}->{h1} + && !$self->{curInfo}->{title}) + { + $self->{curInfo}->{title} = $origtext; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext + if !$self->{curInfo}->{original}; + $self->{insideOrig} = 0; + } + if ($self->{insideInfos}) + { + if ($origtext =~ /([0-9]{4})- (.*?)- ([^-]*)(?:- (.*))?/) + { + my $date = $1, my $nat = $2, my $type = $3, my $time = $4; + $nat =~ s|/|, |g; + $type =~ s|/|,|g; + + $self->{curInfo}->{date} = $date; + $self->{curInfo}->{country} = $nat; + $self->{curInfo}->{genre} = $type; + $self->{curInfo}->{time} = $time; + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideArtists}) + { + if ($origtext =~ /R.alisation\s*:\s*(.*)/) + { + $self->{curInfo}->{director} = $1 if !$self->{curInfo}->{director}; + } + elsif ($origtext =~ /avec\s*:?\s*(.*)/i) + { + if (!$self->{curInfo}->{actors}) + { + $self->{curInfo}->{actors} = $1; + $self->{curInfo}->{actors} =~ s/\s*\(([^\)]*)\)\s*/;$1/g; + } + } + $self->{insideArtists} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + $html =~ s/<!--[^-]*-->//g; + $html =~ s/<b>|<\/b>//g; + $html =~ s/ / /g; + $html =~ s/\\'//g; + $html =~ s|<A HREF="/modules/Artistes/fiche/[0-9]*[^>]*>(.*?)</A>|$1|gi; + $html =~ s/<font class=link_news_2>([^<]*)<\/font>/$1/gi; + $html =~ s/<font class=link4dtext>([^<]*)<br>([^<]*)<\/TD>/<font class=link4dtext>$1 $2<\/font><\/TD>/gi; + $html =~ s|<h1>Oops\!</h1>||gi; + + $html =~ s|\x{92}|'|gi; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|œ|oe|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.cinemotions.com/recherche/$word.html" + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.cinemotions.com' . $url; + } + + sub getName + { + return 'Cinemotions.com'; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCsfd.pm b/lib/gcstar/GCPlugins/GCfilms/GCCsfd.pm new file mode 100644 index 0000000..ea84b45 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCsfd.pm @@ -0,0 +1,699 @@ +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCfilms::GCCsfd; + +################################################### +# +# Copyright 2005-2009 Tian +# Copyright 2007,2011 Petr Gajdůšek <gajdusek.petr@centrum.cz> +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +#use warnings; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginCsfd; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchCharset + # Charset of search term + sub getSearchCharset + { + return 'UTF-8'; + } + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.csfd.cz/hledat/?q=$word"; + } + + # getItemUrl + # Used to get the full URL of a movie page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + $url = "http://www.csfd.cz" . $url if ($url !~ /^http:/); + return $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + #sub getCharset { + # my $self = shift; + # + # return "UTF-8"; + #} + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "CSFD.cz"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Petr Gajdůšek'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'CS'; + } + + # hasSearchYear + # Used to hide year column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchYear + { + return 1; + } + + # hasSearchDirector + # Used to hide director column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchDirector + { + return 1; + } + + # hasSearchActors + # Used to hide actors column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchActors + { + return 1; + } + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + + return 'Žánr'; + } + + # changeUrl + # Can be used to change URL if movie URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + $self->{parsingEnded} = 0; + if ($self->{parsingList}) + { + # Search results + + # Initial values for search results parsing + # There are two movies list: + # First with detailed info (title, genre, origin country, year, directors, actors) + # Second with brief list of other movies (title, year) + + # We are in brief list containing other movies without details + $self->{insideOtherMovies} = 0; + # Movie link; movie's details follow if not in brief list + $self->{isMovie} = 0; + + ## Details: + + # Movie's details will follow: Genre, origin, actors, directors, year + $self->{insideDetails} = 0; + # In movie's details after paragraph with Genre, origin and date + $self->{wasDetailsInfo} = 0; + # In movie's details: directors and actors + $self->{directors} = (); + $self->{directorsCounter} = 0; + $self->{actors} = (); + $self->{actorsCounter} = 0; + $self->{insideDirectors} = 0; + $self->{insideActors} = 0; + + # Movie year + $self->{isYear} = 0; + + ## Preprocess + + # directors and actors + $html =~ s/\n\s*Režie:\s([^\n]*)/<div class="directors">$1<\/div>/g; + $html =~ s/\n\s*Hrají:\s([^\n].*)/<div class="actors">$1<\/div>/g; + # year + $html =~ s/<span class="film-year">\(([0-9]+)\)<\/span>/<span class="film-year">$1<\/span>/g; + } + else + { + # Movie page + + # Initial values for search results parsing + + # array containg other movie titles (not exported to GCStar) + $self->{titles} = (); + # in list containing other movie titles + $self->{isTitles} = 0; + # in the original title (title for same country as movie's origin) + $self->{isOrigTitle} = 0; + # original title (if not set during parsing it will be set to main title at the end) + $self->{origTitle} = undef; + $self->{titlesCounter} = 0; + + $self->{insideGenre} = 0; + + $self->{awaitingSynopsis} = 0; + $self->{insideSynopsis} = 0; + + # inside details with country, date (year) and time (length) + $self->{insideInfo} = 0; + + $self->{insideRating} = 0; + + # User comments + # Each comment consists of commenter (user) and his comment + + $self->{insideCommentAuthor} = 0; + $self->{awaitingComment} = 0; + $self->{insideComment} = 0; + + # In directors and actors + $self->{insideDirectors} = 0; + $self->{insideActors} = 0; + $self->{directors} = (); + $self->{directorsCounter} = 0; + $self->{actors} = (); + $self->{actorsCounter} = 0; + + ## Preprocess + + # removee <br /> and <br> + $html =~ s/<br( \/)?>/\n/g; + ## Synopsis + # remove list bullet + $html =~ s/<img src="http:\/\/img.csfd.cz\/sites\/web\/images\/common\/li.gif"[^>]*>//g; + # remove hyperlink to user profile + $html =~ s/( <span class="source[^\(]*\()<a[^>]*>([^<]*)<\/a>/$1uživatel $2/g; + # remove <span></span> around synopsis source + $html =~ s/ <span class="source[^\(]*\(([^\)]*)\)<\/span>/\n-- $1/g; + $html =~ s/<div data-truncate="570">([^<]*)<\/div>/$1/g; + } + return $html; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a movie information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{movieIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill (if available) following fields: + # + # $self->{movieList}[$self->{movieIdx}]->{title} + # $self->{movieList}[$self->{movieIdx}]->{url} + # $self->{movieList}[$self->{movieIdx}]->{actors} + # $self->{movieList}[$self->{movieIdx}]->{director} + # $self->{movieList}[$self->{movieIdx}]->{date} + # $self->{movieList}[$self->{movieIdx}]->{extra} + + # When processing a movie page, you need to fill the fields (if available) in $self->{curInfo}. They are: + # + # $self->{curInfo}->{title} + # $self->{curInfo}->{director} + # $self->{curInfo}->{original} (Original title) + # $self->{curInfo}->{actors} + # $self->{curInfo}->{genre} (Comma separated list of movie type) + # $self->{curInfo}->{country} (Movie Nationality or country) + # $self->{curInfo}->{date} + # $self->{curInfo}->{time} + # $self->{curInfo}->{synopsis} + # $self->{curInfo}->{image} + # $self->{curInfo}->{audio} + # $self->{curInfo}->{subt} + # $self->{curInfo}->{age} 0 : No information + # 1 : Unrated + # 2 : All audience + # 5 : Parental Guidance + # >= 10 : Minimum age value + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + + # in brief list of other movies (without details) + if ($tagname eq "ul" and $attr->{class} eq "films others") + { + $self->{insideOtherMovies} = 1; + } + + # in link to movie page + if ($tagname eq "a" and $attr->{href} =~ m/\/film\/[0-9]+-.*/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + $self->{insideDetails} = 1 if ($self->{insideOtherMovies} != 1); + $self->{wasDetailsInfo} = 0; + } + + # directors and actors + if ($tagname eq "div") + { + $self->{insideDirectors} = 1 if ($attr->{class} eq "directors"); + $self->{insideActors} = 1 if ($attr->{class} eq "actors"); + } + + # year + if ($tagname eq "span") + { + $self->{isYear} = 1 if ($attr->{class} eq "film-year"); + } + } + else + { + + # Synopsis + if ( $tagname eq "div" + and $attr->{class} eq "content" + and $self->{awaitingSynopsis}) + { + $self->{insideSynopsis} = 1; + $self->{awaitingSynopsis} = 0; + } + + # Poster + if ( $tagname eq "img" + and $attr->{src} =~ /^http:\/\/img\.csfd\.cz\/posters\//) + { + $self->{curInfo}->{image} = $attr->{src}; + } + + # Original name and other names + if ($tagname eq "ul" and $attr->{class} eq "names") + { + $self->{isTitles} = 1; + } + + if ($tagname eq "img" and $self->{isTitles}) + { + $self->{isOrigTitle} = 1 if ($attr->{alt} !~ /název$/); + $self->{isSKTitle} = 1 if ($attr->{alt} =~ /SK název$/); + } + + # Genre + if ($tagname eq "p" and $attr->{class} eq "genre") + { + $self->{insideGenre} = 1; + } + + # Info (country ,date, time = duration) + if ($tagname eq "p" and $attr->{class} eq "origin") + { + $self->{insideInfo} = 1; + } + + # Rating + if ($tagname eq "h2" and $attr->{class} eq "average") + { + $self->{insideRating} = 1; + } + + # Comments + if ($tagname eq "h5" and $attr->{class} eq "author") + { + $self->{insideCommentAuthor} = 1; + } + if ($self->{awaitingComment} and $tagname eq "p" and $attr->{class} eq "post") + { + $self->{awaitingComment} = 0; + $self->{insideComment} = 1; + } + + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{parsingList}) + { + + # movie details + $self->{insideDetails} = 0 + if ($tagname eq "div") + and $self->{insideDetails}; + + # directors and actors + if ($tagname eq "div") + { + if ($self->{insideDirectors}) + { + $self->{insideDirectors} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{director} = + join(', ', @{$self->{directors}}); + $self->{directors} = (); + $self->{directorsCounter} = 0; + } + if ($self->{insideActors}) + { + $self->{insideActors} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = + join(', ', @{$self->{actors}}); + $self->{actors} = (); + $self->{actorsCounter} = 0; + } + } + } + else + { + + # Synopsis + $self->{insideSynopsis} = 0 if ($tagname eq "div"); + + # Titles + if ($tagname eq "ul" and $self->{isTitles}) + { + $self->{isTitles} = 0; + } + + if ( $tagname eq "body" ) + { + $self->{curInfo}->{original} ||= $self->{curInfo}->{title}; + } + + # Actors + if ($tagname eq "div" and $self->{insideActors}) + { + $self->{curInfo}->{actors} = join(', ', @{$self->{actors}}); + $self->{insideActors} = 0; + } + + # Directors + if ($tagname eq "div" and $self->{insideDirectors}) + { + $self->{curInfo}->{director} = join(', ', @{$self->{directors}}); + $self->{insideDirectors} = 0; + } + + # Comment + + $self->{insideCommentAuthor} = 0 + if ($tagname eq "h5" and $self->{insideCommentAuthor}); + + if ($tagname eq "li" and $self->{isComment}) + { + $self->{curInfo}->{comment} .= "\n"; + $self->{isComment} = 0; + } + + # Debug + if ($tagname eq "body" and $self->{debug}) + { + use Data::Dumper; + print Dumper $self->{curInfo}; + } + } + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + $origtext =~ s/^\s+|\s+$//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{h1} && $origtext !~ m/Vyhledávání/i) + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + + # Movie title + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + return; + } + + # Date (year) + elsif ($self->{isYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $origtext; + $self->{isYear} = 0; + } + + # Extra movie info: genre, origin, date + elsif ( $self->{inside}->{p} + and $self->{insideDetails} + and $self->{wasDetailsInfo} == 0) + { + my @tmp = split(', ', $origtext); + my $pos = $#tmp; + my ($year, $country, $genre) = (undef, undef, undef); + $year = $tmp[$pos] if ($tmp[$pos] =~ /^\d+$/); + $pos--; + $country = $tmp[$pos] if ($pos >= 0); + $pos--; + $genre = $tmp[$pos] if ($pos >= 0); + + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $year if (defined $year); + $self->{itemsList}[ $self->{itemIdx} ]->{country} = $country + if (defined $country); + $self->{itemsList}[ $self->{itemIdx} ]->{extra} = $genre + if (defined $genre); + $self->{wasDetailsInfo} = 1; + } + + # Directors + elsif ($self->{inside}->{a} and $self->{insideDirectors}) + { + push @{$self->{directors}}, $origtext; + $self->{directorsCounter}++; + } + + # Actors + elsif ($self->{inside}->{a} and $self->{insideActors}) + { + push @{$self->{actors}}, $origtext; + $self->{actorsCounter}++; + } + } + else + { + + # Movie titles + if ($self->{inside}->{h1}) + { + $self->{curInfo}->{title} = $origtext + if !$self->{curInfo}->{title}; + } + if ($self->{inside}->{h3} and $self->{isTitles}) + { + $self->{titlesCounter}++; + $self->{titles}[ $self->{titlesCounter} ] = $origtext; + if ($self->{isOrigTitle}) + { + $self->{curInfo}->{original} ||= $origtext; + $self->{isOrigTitle} = 0; + } + if ($self->{isSKTitle} and $self->{lang} eq "SK") + { + $self->{curInfo}->{title} = $origtext; + $self->{isSKTitle} = 0; + } + } + + # Genre + if ($self->{insideGenre}) + { + $origtext =~ s/ \/ /,/g; + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + + # Extra movie info: country, date (year), time + if ($self->{insideInfo}) + { + my ($country, $year, $time) = split(', ', $origtext); + $country =~ s/ \/ /,/g; + + $self->{curInfo}->{country} = $country; + $self->{curInfo}->{date} = $year; + $self->{curInfo}->{time} = $time; + + $self->{insideInfo} = 0; + } + + # Directors and Actors + if ($self->{inside}->{h4}) + { + $self->{insideDirectors} = 1 if ($origtext =~ /^Režie:/); + $self->{insideActors} = 1 if ($origtext =~ /^Hrají:/); + } + + if ($self->{inside}->{a} and $self->{insideDirectors}) + { + push @{$self->{directors}}, $origtext; + $self->{directorsCounter}++; + } + if ($self->{inside}->{a} and $self->{insideActors}) + { + #push @{$self->{curInfo}->{actors}}, [$origtext] + # if ($self->{actorsCounter} < + # $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + #$self->{actorsCounter}++; + push @{$self->{actors}}, $origtext; + $self->{actorsCounter}++; + } + + # Synopsis + if ($self->{inside}->{h3}) + { + $self->{awaitingSynopsis} = 1 if ($origtext eq "Obsah"); + } + if ($self->{inside}->{li} and $self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext . "\n\n\n"; + } + + # Rating + if ($self->{insideRating}) + { + $origtext =~ s/([0-9]+)%/$1/; + $self->{curInfo}->{ratingpress} = int($origtext / 10 + .5) + if ($origtext ne ""); + $self->{insideRating} = 0; + } + + # Comments + if ($self->{inside}->{a} and $self->{insideCommentAuthor}) + { + $self->{curInfo}->{comment} .= $origtext . " napsal(a):\n"; + $self->{awaitingComment} = 1; + } + if ($self->{insideComment}) + { + $self->{curInfo}->{comment} .= $origtext . "\n\n"; + $self->{insideComment} = 0; + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # Do your init stuff here + + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + country => 1 + }; + + $self->{lang} = "CS"; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + $self->{debug} = ($ENV{GCS_DEBUG_PLUGIN_PHASE} > 0); + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCCulturalia.pm b/lib/gcstar/GCPlugins/GCfilms/GCCulturalia.pm new file mode 100644 index 0000000..55c6692 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCCulturalia.pm @@ -0,0 +1,241 @@ +package GCPlugins::GCfilms::GCCulturalia; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginCulturalia; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ /^\.\.\/art\/ver\.php\?art=/) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /\.\.\/(imatges\/articulos\/[0-9]*-1\.jpg)/) + { + $self->{curInfo}->{image} = "http://www.culturalianet.com/" . $1; + } + } + elsif ($tagname eq "font") + { + $self->{insideName} = 1 if $attr->{class} eq "titulo2"; + $self->{insideInfos} = 1 if $attr->{class} eq "titulo3"; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + if ($origtext =~ /De ([^\(]*) \(([0-9]{4})\)/) + { + $self->{itemsList}[$self->{itemIdx}]->{"director"} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $2; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + } + else + { + $origtext =~ s/\.$//; + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext if !$self->{itemsList}[$self->{itemIdx}]->{"title"}; + } + return; + } + } + else + { + $origtext =~ s/\s{2,}//g; + $origtext =~ s/\n//g if !$self->{insideSynopsis}; + + if ($self->{insideName}) + { + if ($origtext =~ /([^\(]*)\. \(([0-9]{4})\)/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + } + $self->{insideName} = 0; + } + elsif ($self->{insideInfos}) + { + $origtext =~ s/ , //; + $origtext =~ s/(, )*$//; + if ($origtext =~ /Género\:(.*)/) + { + ($self->{curInfo}->{genre} = $1) =~ s/ \/ /,/g; + } + elsif ($origtext =~ /Nacionalidad\:(.*)/) + { + $self->{curInfo}->{country} = $1; + } + elsif ($origtext =~ /Director\:(.*)/) + { + $self->{curInfo}->{director} = $1; + } + elsif ($origtext =~ /Actores\:(.*)/) + { + $self->{curInfo}->{actors} = $1; + } + elsif ($origtext =~ /Sinopsis\:(.*)/) + { + ($self->{curInfo}->{synopsis} = $1) =~ s/, //; + } + elsif ($origtext =~ /Duración\:(.*)/) + { + ($self->{curInfo}->{time} = $1) =~ s/\.$//; + } + $self->{insideInfos} = 0; + } + elsif ($origtext =~ /^Sinopsis\:(.*)/) + { + ($self->{curInfo}->{synopsis} = $1) =~ s/, //; + $self->{curInfo}->{synopsis} =~ s/(, )*$//; + } + if ($self->{inside}->{i}) + { + $self->{curInfo}->{original} = $origtext; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{</?b>}{}g; + $html =~ s/<br>/, /g; + $html =~ s{<a href=\.\./art/ver_e\.php\?nombre=[0-9]*>([^<]*)</a>} + {$1}g; + $html =~ s{<font class.=..titulo3.>([^<]*)</font>([^<]*)} + {<font class ='titulo3'>$1 $2</font>}g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.culturalianet.com/bus/resu.php?texto=$word&donde=1"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.culturalianet.com/bus/" . $url; + } + + sub getName + { + return "CulturaliaNet"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'ES'; + } + + sub getCharset + { + my $self = shift; + + return "Windows-1252"; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDVDEmpire.pm b/lib/gcstar/GCPlugins/GCfilms/GCDVDEmpire.pm new file mode 100644 index 0000000..a32a7a8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDVDEmpire.pm @@ -0,0 +1,427 @@ +package GCPlugins::GCfilms::GCDVDEmpire; + +################################################### +# +# Copyright 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginDVDEmpire; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start { + my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; + + $self->{inside}->{$tagname}++; + + if ( $self->{parsingList} ) { + if ( $self->{outOfMovieList} ) + { + return; + } + elsif (( $self->{inMovieList} ) + && ( $self->{inMovie} eq 0 ) + && ( $tagname eq 'a' ) + && ( $attr->{href} =~ /^(\/Exec\/v4_item.asp\?item_id=[0-9]*)$/ ) ) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{inMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (( $self->{inMovie} ) + && ( $tagname eq 'img' ) + && ( $attr->{src} =~ /(.*gen\/movies\/[0-9]*t.jpg)/ ) ) + { + (my $image = $attr->{src}) =~ s/t.jpg$/h.jpg/; + $self->{itemsList}[ $self->{itemIdx} ]->{image} = $image; + } + elsif (( $self->{inMovie} ) + && ( $tagname eq 'a' ) + && ( $attr->{href} =~ /cast_id/ ) ) + { + $self->{isActors} = 1; + } + elsif (( $self->{inMovie} ) + && ( $tagname eq 'td' ) + && ( $attr->{bgcolor} eq '#D7DDE7' ) ) + { + $self->{inMovie} = 0; + } + elsif (( $tagname eq 'div' ) + && ( $attr->{id} eq 'Search_Container' ) ) + { + $self->{inMovieList} = 1; + } + elsif ( ( $self->{inMovieList} ) + && ( $tagname eq 'endsearch' )) + { + $self->{inMovieList} = 0; + $self->{outOfMovieList} = 1; + } + } + else { + if ( $self->{parsingEnded} ) + { + if (!$self->{infoSet}) + { + $self->{curInfo}->{image} = $self->{itemsList}[$self->{wantedIdx}]->{image}; + $self->{curInfo}->{date} = $self->{itemsList}[$self->{wantedIdx}]->{date}; #"short text" + $self->{curInfo}->{time} = $self->{itemsList}[$self->{wantedIdx}]->{time}; #"short text" + $self->{curInfo}->{age} = $self->{itemsList}[$self->{wantedIdx}]->{age}; #"options" + ($self->{curInfo}->{backpic} = $self->{curInfo}->{image}) =~ s/h.jpg/b.jpg/; #"image" + + $self->{infoSet} = 1; + } + return; + } + elsif ( ($tagname eq 'div') + && ($attr->{id} eq 'Search_Container') ) + { + $self->{isContent} = 1; + } + elsif ( ( $self->{isContent} ) + && ( $tagname eq 'div' ) ) + { + $self->{inNonContentDiv} = 1; + } + elsif ( $self->{isContent}) + { + if ( ($tagname eq 'td') + && ($attr->{class} eq 'fontxlarge') ) + { + $self->{isTitle} = 1 ; + } + elsif ($tagname eq 'rating') + { + $self->{isRating} = 1 ; + } + elsif ($tagname eq 'actors') + { + $self->{isActors} = 1 ; + } + elsif ( ($self->{isActors}) + && ($tagname eq 'a') + && ($attr->{href} =~ /v4_list_cast.asp/) ) + { + $self->{isActor} = 1 ; + } + elsif ($tagname eq 'directors') + { + $self->{isDirectors} = 1 ; + } + elsif ( ($self->{isDirectors}) + && ($tagname eq 'a') + && ($attr->{href} =~ /v4_list_cast.asp/) ) + { + $self->{isDirector} = 1 ; + } + elsif ($tagname eq 'genres') + { + $self->{isGenres} = 1 ; + } + elsif ( ($self->{isGenres}) + && ($tagname eq 'a') + && ($attr->{href} =~ /v2_category.asp/) ) + { + $self->{isGenre} = 1 ; + } + elsif ($tagname eq 'audio') + { + $self->{inAudio} = 1 ; + } + elsif ( ($self->{inAudio}) + && ($tagname eq 'td') ) + { + $self->{isAudio} = 1 ; + } + elsif ( ($self->{isTitle}) + && ($tagname eq 'strong') ) + { + $self->{isTitle} = 2 ; + } + elsif ( ($self->{startSynopsis}) + && ($tagname eq 'td') ) + { + $self->{isSynopsis} = 1 ; + } + elsif ( ($self->{isSynopsis}) + && ($tagname eq 'br') ) + { + $self->{synopsisLineBreak} = 1 ; + } + } + } + } + + sub end { + my ( $self, $tagname ) = @_; + + $self->{inside}->{$tagname}--; + + if ( !$self->{parsingList} ) + { + if ( $self->{parsingEnded} ) + { + return; + } + if ($self->{isContent}) + { + if ( ( $tagname eq 'div' ) + && ( !$self->{inNonContentDiv} ) ) + { + $self->{isContent} = 0; + $self->{parsingEnded} = 1; + } + elsif ( ( $tagname eq 'div' ) + && ( $self->{inNonContentDiv} ) ) + { + $self->{inNonContentDiv} = 0; + } + elsif ( ( $tagname eq 'table' ) + && ( $self->{isSynopsis} ) ) + { + $self->{startSynopsis} = 0; + $self->{SynopsisEnded} = 1; + $self->{isSynopsis} = 0; + } + elsif ( ($self->{isActors} ) && ($tagname eq 'actors') ) + { + $self->{isActors} = 0 ; + } + elsif ( ($self->{isGenres} ) && ($tagname eq 'genres') ) + { + $self->{isGenres} = 0 ; + } + elsif ( ($self->{isDirectors} ) && ($tagname eq 'directors') ) + { + $self->{isDirectors} = 0 ; + } + elsif ( ($self->{isAudio}) + && ($tagname eq 'td') ) + { + $self->{isAudio} = 0 ; + $self->{inAudio} = 0 ; + } + } + } + + } + + sub text { + my ( $self, $origtext ) = @_; + + if ( $self->{parsingList} ) + { + if ( ( $self->{inMovieList} ) && ( $self->{isMovie} ) ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + } + elsif ( ( $self->{inMovie} ) && ( $origtext =~ /([^~]*)~~~([0-9]*)mins.~~~Release Date:[^~]*~~~Prod Year: ([0-9]{4})/ ) ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 1 + if ( $1 eq 'Unrated' ) || ( $1 eq 'Open' ); + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 2 + if ( $1 eq 'G' ) || ( $1 eq 'Approved' ); + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 5 + if ( $1 eq 'PG' ) || ( $1 eq 'M' ) || ( $1 eq 'GP' ); + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 13 + if $1 eq 'PG-13'; + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 17 + if $1 eq 'R'; + $self->{itemsList}[ $self->{itemIdx} ]->{age} = 18 + if ( $1 eq 'NC-17' ) || ( $1 eq 'X' ); + + $self->{itemsList}[ $self->{itemIdx} ]->{time} = $2 . ' min'; + + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $3; + } + elsif ( ( $self->{inMovie} ) && ( $self->{isActors} ) ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} .= $origtext . ', '; + $self->{isActors} = 0; + } + } + else { + $origtext =~ s/^\s*//; + + return if !$origtext; + if ( $self->{parsingEnded} ) + { + return; + } + if ($self->{isContent}) + { + if ( $self->{isTitle} eq 2) + { + $self->{curInfo}->{title} = $origtext; #"short text" + $self->{curInfo}->{original} = $origtext; #"short text" + $self->{isTitle} = 0 ; + } + elsif ( $self->{isRating}) + { + $self->{curInfo}->{ratingpress} = int($origtext * 2); #"number" + $self->{isRating} = 0 ; + } + elsif ( ( !$self->{SynopsisEnded} ) + && ( $origtext eq 'Synopsis' ) ) + { + $self->{startSynopsis} = 1 ; + } + elsif ( $self->{isSynopsis} ) + { + $self->{curInfo}->{synopsis} .= "\n\n" if $self->{synopsisLineBreak}; + $self->{curInfo}->{synopsis} .= $origtext ; #"long text" + $self->{curInfo}->{synopsis} .= " " if $self->{synopsisLineBreak}; + $self->{synopsisLineBreak} = 0 ; + } + elsif ( $self->{isActor} ) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{isActor} = 0 ; + } + elsif ( $self->{isGenre} ) + { + push @{$self->{curInfo}->{genre}}, [$origtext]; + $self->{isGenre} = 0 ; + } + elsif ( $self->{isDirector} ) + { + $self->{curInfo}->{director} .= $origtext; #"long text" + $self->{isDirector} = 0 ; + $self->{isDirectors} = 0 ; + } + elsif ( $self->{isAudio} ) + { + (my $language = $origtext) =~ s/([^:]*):(.*)/$1/ ; + my $audio = $2 ; + $language =~ s/\s// ; + $audio =~ s/\r// ; + push @{$self->{curInfo}->{audio}}, [$language, $audio]; + } + } + } + } + + sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless( $self, $class ); + + $self->{hasField} = { + title => 1, + date => 1, + actors => 1, + age => 1, + time => 1, + image => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{inMovie} = 0; + $self->{isContent} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess { + my ( $self, $html ) = @_; + + $self->{parsingEnded} = 0; + + if ($self->{parsingList}) + { + $html =~ s/<\/nobr>[ ]*~[ ]*<nobr>/~~~/g ; + $html =~ s/<b>Phone #:<\/b>/<endsearch>here<\/endsearch>/g ; + + } + else + { + $html =~ s/<b>([0-9\.]*)<\/b> out of <b>5<\/b>/<rating>$1<\/rating>/g ; #/ + $html =~ s/<b>Actors:<\/b>/<actors>/g ; + $html =~ s/<b>Writers:<\/b>/<\/actors>/g ; + $html =~ s/<b>Directors:<\/b>(.*cast_id[^\/]*<\/a>)/<directors>$1<\/directors>/g ; #/ + $html =~ s/<b>Genre<\/b>(.*cat_id[^\/]*<\/a>)/<genres>$1<\/genres>/g ; #/ + $html =~ s/<b>Audio:<\/b>/<audio><\/audio>/g ; + $html =~ s/<font face='[^']*' size='[^']*' color='#FFFFFF'>i<\/font>/ /g ; + } + + return $html; + } + + sub getSearchUrl { + my ( $self, $word ) = @_; + + my $searchvalue = 32 ; + my $strictmatching = 0; + if ($strictmatching) + { + $searchvalue = 64 ; + } + return "http://www.dvdempire.com/Exec/v1_search_all.asp?string=$word&pp=5&search_refined=$searchvalue"; + } + + sub getItemUrl { + my ( $self, $url ) = @_; + + return 'http://www.dvdempire.com/' . $url; + } + + sub changeUrl { + my ( $self, $url ) = @_; + + return $url; + } + + sub getName { + return "DVDEmpire (EN)"; + } + + sub getCharset { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getAuthor { + return 'FiXx'; + } + + sub getLang { + return 'EN'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDVDFr.pm b/lib/gcstar/GCPlugins/GCfilms/GCDVDFr.pm new file mode 100644 index 0000000..d75c2f6 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDVDFr.pm @@ -0,0 +1,374 @@ +package GCPlugins::GCfilms::GCDVDFr; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginDVDFr; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'dvd') + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + elsif ($tagname eq 'id') + { + $self->{isID} = 1; + } + elsif ($tagname eq 'fr') + { + $self->{isTitleFR} = 1; + } + elsif (($tagname eq 'star') && ($attr->{type} =~ /R.alisateur/)) + { + $self->{isDirector} = 1; + } + elsif ($tagname eq "media") + { + $self->{isMedia} = 1; + } + elsif ($tagname eq "edition") + { + $self->{isEdition} = 1; + } + } + else + { + if (($tagname eq "cover") || ($tagname eq "jaquette")) + { + $self->{insideImage} = 1; + } + elsif ($tagname eq "url") + { + $self->{insideURL} = 1; + } + elsif (($tagname eq "fr") || ($tagname eq "titres_fr")) + { + $self->{insideTitleFR} = 1; + } + elsif (($tagname eq "vo") || ($tagname eq "titres_vo")) + { + $self->{insideTitleVO} = 1; + } + elsif ($tagname eq "pays") + { + $self->{insideNat} = 1; + } + elsif ($tagname eq "annee") + { + $self->{insideYear} = 1; + } + elsif ($tagname eq "synopsis") + { + $self->{insideSynopsis} = 1; + } + elsif ($tagname eq "duree") + { + $self->{insideTime} = 1; + } + elsif ($tagname eq "realisateur") + { + $self->{insideDirector} = 1; + } + elsif ($tagname eq "star") + { + $self->{insideDirector} = 1 if $attr->{type} eq "Réalisateur"; + $self->{insideActors} = 1 + if (! $attr->{type}) || ($attr->{type} eq "Acteur"); + } + elsif ($tagname eq "categorie") + { + $self->{insideGenre} = 1; + } + elsif ($tagname eq "rating") + { + $self->{curInfo}->{age} = 2 if $attr->{id} == 1; + $self->{curInfo}->{age} = 5 if $attr->{id} == 2; + $self->{curInfo}->{age} = 12 if $attr->{id} == 3; + $self->{curInfo}->{age} = 13 if $attr->{id} == 4; + $self->{curInfo}->{age} = 16 if $attr->{id} == 5; + $self->{curInfo}->{age} = 18 if $attr->{id} > 5; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if (length($origtext) < 2) && (! $self->{isID}); + + if ($self->{parsingList}) + { + if ($self->{isID}) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.dvdfr.com/api/dvd.php?id=$origtext"; + $self->{isID} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[$self->{itemIdx}]->{"director"} .= $self->{itemsList}[$self->{itemIdx}]->{"director"} ? ", " . $origtext : $origtext + if ($self->{directorCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_DIRECTORS); + $self->{directorCounter}++; + $self->{isDirector} = 0; + } + elsif ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{directorCounter} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isTitleFR}) + { + $self->{itemsList}[$self->{itemIdx}]->{"title"} = $origtext; + $self->{isTitleFR} = 0; + } + elsif ($self->{isMedia}) + { + $origtext = '' if $origtext !~ /\w/; + $self->{itemsList}[$self->{itemIdx}]->{"format"} = $origtext; + $self->{isMedia} = 0; + } + elsif ($self->{isEdition}) + { + $origtext = '' if $origtext !~ /\w/; + $self->{itemsList}[$self->{itemIdx}]->{"extra"} = $origtext; + $self->{isEdition} = 0; + } + } + else + { + $origtext =~ s/\s{2,}//g; + + if ($self->{insideImage}) + { + if ($origtext =~ m|/microapp/jaquette.php\?id=([0-9]*)|) + { + my $dir = int($1 / 1000); + $self->{curInfo}->{image} = "http://dvdfr.com/images/dvd/cover_200x280/$dir/$1.jpg"; + } + else + { + $self->{curInfo}->{image} = $origtext; + } + $self->{insideImage} = 0; + } + elsif ($self->{insideURL}) + { + $self->{curInfo}->{$self->{urlField}} = $origtext; + $self->{insideURL} = 0; + } + elsif ($self->{insideTitleFR}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitleFR} = 0; + } + elsif ($self->{insideTitleVO}) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideTitleVO} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} .= $self->{curInfo}->{country} ? ", " . $origtext : $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideYear}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideYear} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{curInfo}->{synopsis} =~ s/\n/ /g; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} .= $self->{curInfo}->{director} ? ", " . $origtext : $origtext + if ($self->{directorCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_DIRECTORS); + $self->{directorCounter}++; + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + $self->{curInfo}->{actors} .= $self->{curInfo}->{actors} ? ", " . $origtext : $origtext + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{insideActors} = 0; + } + elsif ($self->{insideGenre}) + { + $self->{curInfo}->{genre} .= $self->{curInfo}->{genre} ? "," . $origtext : $origtext; + $self->{insideGenre} = 0; + } + elsif (($self->{inside}->{track}) && ($self->{inside}->{langue})) + { + if ($self->{curInfo}->{audio} !~ /(^|,)$origtext(,|$)/) + { + $self->{curInfo}->{audio} .= ',' if $self->{curInfo}->{audio}; + $self->{curInfo}->{audio} .= $origtext; + } + } + elsif (($self->{inside}->{soustitrage}) && ($self->{inside}->{soustitre})) + { + if ($self->{curInfo}->{subt} !~ /(^|,)$origtext(,|$)/) + { + $self->{curInfo}->{subt} .= ',' if $self->{curInfo}->{subt}; + $self->{curInfo}->{subt} .= $origtext; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 1, + actors => 0, + format => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{directorCounter} = 0; + $self->{actorsCounter} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word = 'ean:'.$word + if $word =~ /^[\dX]{8}[\dX]*$/; + + return "http://www.dvdfr.com/api/search.php?title=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return "http://www.dvdfr.com/"; + } + + sub changeUrl + { + my ($self, $url) = @_; + + $url =~ s/\/dvd\//\/api\//; + + return $url; + } + + sub getName + { + return "DVDFr.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + + sub getExtra + { + return 'Edition'; + } + + sub getEanField + { + return 'title'; + } + + sub isPreferred + { + return 1; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDVDPost.pm b/lib/gcstar/GCPlugins/GCfilms/GCDVDPost.pm new file mode 100644 index 0000000..4bb6456 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDVDPost.pm @@ -0,0 +1,269 @@ +package GCPlugins::GCfilms::GCDVDPost; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginDVDPost; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if (($attr->{href} =~ /^product_info\.php\?products_id=/)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /http:\/\/images\.dvdpost\.be\/\/dvd/) + { + $self->{curInfo}->{image} = $attr->{src}; + } + elsif ($self->{insideAge}) + { + (my $fileName = $attr->{src}) =~ s|.+/([^/]+)$|$1|; + $self->{curInfo}->{age} = 2 if $fileName eq 'all.gif'; + $self->{curInfo}->{age} = 12 if $fileName eq '-12.gif'; + $self->{curInfo}->{age} = 16 if $fileName eq '-16.gif'; + $self->{insideAge} = 0; + } + } + elsif ($tagname eq "table") + { + if ( ($attr->{cellpadding} eq "0") + && ($attr->{cellspacing} eq "0") + && ($attr->{width} eq "100%") + && ($attr->{border} ne "0")) + { + $self->{insideSynopsisFather} = 1; + } + } + elsif ($tagname eq "td") + { + if ($attr->{style} eq "text-align:right;font-size:9px;color:gray") + { + $self->{insideGenre} = 1; + } + elsif (($attr->{class} eq "boxText") && $attr->{align} eq "left") + { + if ($self->{insideSynopsisFather} == 1) + { + $self->{insideSynopsis} = 1; + $self->{insideSynopsisFather} = 0; + } + else + { + $self->{insideSynopsis} = 0; + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + } + else + { + $origtext =~ s/\n*//g if !$self->{insideSynopsis}; + $origtext =~ s/\s{2,}//g; + + if ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideDirector}) + { + $origtext =~ s/ ,/, /g; + $origtext =~ s/^(.*), /$1/; + $self->{curInfo}->{director} = $origtext if !$self->{curInfo}->{director}; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext . " min"; + $self->{insideTime} = 0; + } + elsif ($self->{insideActors}) + { + $origtext =~ s/ ,/, /g; + $origtext =~ s/^(.*), /$1/; + $self->{curInfo}->{actors} = $origtext if !$self->{curInfo}->{actors}; + $self->{insideActors} = 0; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext if !$self->{curInfo}->{original}; + $self->{insideOrig} = 0; + } + elsif ($self->{inside}->{b}) + { + $self->{insideDirector} = 1 if $origtext =~ m/R.alisateur/; + $self->{insideTime} = 1 if $origtext =~ m/Dur.e/; + $self->{insideActors} = 1 if $origtext =~ m/Acteurs/; + $self->{insideAge} = 1 if $origtext =~ m/Public/; + } + elsif ($self->{inside}->{table}) + { + if ($origtext =~ /(.*) \( ([0-9]{4}) \)/) + { + $self->{curInfo}->{title} = $1 if !$self->{curInfo}->{title}; + $self->{curInfo}->{date} = $2 if !$self->{curInfo}->{date}; + } + elsif ($self->{insideGenre}) + { + $origtext =~ s/\|/,/g; + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s/ / /g; + $html =~ s/<u>|<\/u>//g; + $html =~ s/<a href="directors\.php\?directors\_id=[0-9]*">([^<]*)<\/a>/$1/gi; + $html =~ s/<a href="actors\.php\?actors\_id=[0-9]*">([^<]*)<\/a>/$1/gi; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.dvdpost.be/advanced_search_result2.php?language=fr&keywords=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.dvdpost.be/" . $url . "&language=fr" unless $url eq ''; + return "http://www.dvdpost.be/"; + } + + sub getName + { + return "DVDPost.be"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDicshop.pm b/lib/gcstar/GCPlugins/GCfilms/GCDicshop.pm new file mode 100644 index 0000000..d50ea4d --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDicshop.pm @@ -0,0 +1,343 @@ +package GCPlugins::GCfilms::GCDicschop; + +################################################### +# +# Copyright 2005-2010 Tian, Michael Mayer +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginDicshop; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + return if $self->{parsingEnded}; + if ($tagname eq 'div') + { + if ($attr->{class} eq 'ds_l_h') + { + $self->{isMovie} = 1; + } + elsif ($attr->{class} eq 'ds_l_b') + { + $self->{isMovie} = 0; + } + elsif ($attr->{class} eq 'settingSavePlusContainer') + { + $self->{parsingEnded} = 1; + } + } + elsif ($self->{isMovie} && ($tagname eq 'a')) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + } + else + { + return if $self->{parsingEnded}; + + if ($tagname eq 'div') + { + if ($attr->{class} eq "header_section hs_spec") + { + $self->{isInfo} = 1; + } + elsif ($attr->{class} eq "header_section hs_omdomme") + { + $self->{isSynopsis} = 0; + } + elsif ($attr->{class} =~ m/right_cont_section/) + { + $self->{parsingEnded} = 1; + } + elsif (($attr->{class} =~ m/^item([12])$/) && $self->{isInfo}) + { + $self->{isItem} = $1; + } + elsif ($attr->{class} =~ m/ds_produkt_left/) + { + $self->{isCover} = 1; + } + elsif ($attr->{class} =~ m/ds_omdomme_top/) + { + $self->{isRating} = 1; + } + elsif ($attr->{class} =~ m/ds_omdomme_cust/) + { + $self->{isRating} = 0; + } + } + elsif ($tagname eq 'img') + { + if ($self->{isCover} && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $attr->{src}; + + if ($self->{bigPics}) + { + $self->{curInfo}->{image} =~ s|front_normal|front_large|; + $self->{curInfo}->{backpic} = $self->{curInfo}->{image}; + $self->{curInfo}->{backpic} =~ s|front_large|back_large|; + } + } + elsif ($self->{isRating}) + { + $self->{curInfo}->{ratingpress} += 2 + if ($attr->{src} =~ m/rate_big_1.gif/); + $self->{curInfo}->{ratingpress} += 1 + if ($attr->{src} =~ m/rate_big_05.gif/); + } + } + elsif ($tagname eq 'script') + { + $self->{isSynopsis} = 0; + } + elsif ($tagname eq 'br') + { + $self->{curInfo}->{synopsis} .= "\n" + if ($self->{isSynopsis} && $self->{curInfo}->{synopsis}); + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq 'div') + { + $self->{isCover} = 0; + } + elsif ($tagname eq 'p') + { + $self->{curInfo}->{synopsis} .= "\n" + if ($self->{isSynopsis} && $self->{curInfo}->{synopsis}); + } + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + return if !$origtext; + + if ($self->{parsingList}) + { + # evaluate the search result page + if ($self->{isMovie}) + { + if ($self->{inside}->{b}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + elsif ($self->{inside}->{div}) + { + $origtext =~ /^.*?(\d{4}) +(med +([^-.]*))?/; + if ($1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $3; + } + else + { + $origtext =~ /med +([^-.]*)/; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1 + if $1; + } + $self->{itemsList}[ $self->{itemIdx} ]->{actors} =~ s/ och/,/g; + $self->{isMovie} = 0; + } + } + } + else + { + return if $self->{parsingEnded}; + # evaluate the film details page + if ($self->{inside}->{h3}) + { + if ($origtext eq "Filmens handling") + { + $self->{isSynopsis} = 1; + } + } + elsif ($self->{isSynopsis}) # important: elsif, not only if! + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{isItem} == 1) + { + $self->{key} = $origtext; + } + elsif ($self->{isItem} == 2) + { + if ( ($self->{key} eq "Grupp:") + or ($self->{key} eq "Genre:") + or ($self->{key} eq "Underkategori:")) + { + $origtext =~ s| *film$||i; # remove the trailing "film" + $origtext =~ s|/|,|i; + $self->{curInfo}->{genre} .= $origtext . "," + if (!($self->{curInfo}->{genre} =~ m/$origtext/)); + } + elsif ($self->{key} eq "Speltid:") + { + $self->{curInfo}->{time} = $origtext; + } + elsif ($self->{key} eq "Svensk titel:") + { + $self->{curInfo}->{title} = $origtext; + } + elsif ($self->{key} eq "Originaltitel:") + { + $self->{curInfo}->{original} = $origtext; + } + elsif ($self->{key} eq "Produktionsland:") + { + if ($self->{curInfo}->{country}) { + $self->{curInfo}->{country} .= ", "; + } + $self->{curInfo}->{country} .= $origtext; + } + elsif ($self->{key} =~ m/Premi.*r:/) + { + $self->{curInfo}->{date} = $origtext; + } + elsif ($self->{key} eq "Regi:") + { + if ($self->{curInfo}->{director}) + { + $self->{curInfo}->{director} .= ", "; + } + $self->{curInfo}->{director} = $origtext; + } + elsif ($self->{key} =~ m/despelare:$/) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + elsif ($self->{key} =~ m/ldersgr.*ns:/) + { + $origtext =~ m/^(\d+) /; + $self->{curInfo}->{"age"} = $1; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 1, + age => 1, + }; + + $self->{isInfo} = 0; + $self->{isRating} = 0; + $self->{isCover} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{isTitle} = 0; + $self->{isSynopsis} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.discshop.se/shop/search_solr.php?lang=&cont=ds&" + . "soktext=$word&subsite_set=movies&lang=se&subsite=bluray&&ref="; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.discshop.se/shop/' . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Discshop.se"; + } + + sub getCharset + { + my $self = shift; + + return "Windows-1252"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'SV'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCDoubanfilm.pm b/lib/gcstar/GCPlugins/GCfilms/GCDoubanfilm.pm new file mode 100644 index 0000000..e3e8563 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCDoubanfilm.pm @@ -0,0 +1,255 @@ +package GCPlugins::GCfilms::GCDoubanfilm; + +################################################### +# +# Copyright 2005-2010 Bai Wensimi +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginDoubanfilm; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if (($page =~ /^bad imdb/) & ($page =~ /^The/)); + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page =~ /feed>$/) + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + foreach my $ItemMovie( @{$xml->{'entry'}}){ + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $ItemMovie->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $ItemMovie->{'title'}; + foreach my $tmp_author (@{$ItemMovie->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'director'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=',';} + $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=$tmp_author->{'name'}; + } + foreach my $check1(@{$ItemMovie->{'db:attribute'}}){ + my $db_attr1=$check1->{'name'}; + SWITCH1: { + $db_attr1 eq 'country' and $self->{itemsList}[ $self->{itemIdx} ]->{'country'}=$check1->{'content'} ,last SWITCH1; + $db_attr1 eq 'pubdate' and $self->{itemsList}[ $self->{itemIdx} ]->{'date'}=$check1->{'content'} ,last SWITCH1; + ; + } + } + } + } + else + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $xml->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $xml->{'title'}; + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=$tmp_author->{'name'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=','; + } + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH: { + $db_attr eq 'country' and $self->{itemsList}[ $self->{itemIdx} ]->{country}=$check->{'content'} ,last; + $db_attr eq 'pubdate' and $self->{itemsList}[ $self->{itemIdx} ]->{date}=$check->{'content'} ,last; + } + } + } + } + else + { + $xml =$xs->XMLin($page, + ForceArray => [ 'author' ], + KeyAttr => {'db:tag'=>'name','link'=>'rel'}); + foreach my $tmp_author (@{$xml->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'director'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'director'}.=',';} + $self->{curInfo}->{director}.=$tmp_author->{'name'}; + } + $self->{curInfo}->{title}=$xml->{'title'}; + $self->{curInfo}->{original}=$xml->{'title'}; + $self->{curInfo}->{webPage}=$xml->{'link'}->{'alternate'}->{'href'}; + $self->{curInfo}->{synopsis}=$xml->{'summary'}; + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH2: { + $db_attr eq 'country' and $self->{curInfo}->{country}=$check->{'content'} ,last SWITCH2; + $db_attr eq 'pubdate' and $self->{curInfo}->{date}=$check->{'content'} ,last SWITCH2; + $db_attr eq 'cast' and { ($self->{curInfo}->{actors} ne '' ) and $self->{curInfo}->{actors}.=',' }, $self->{curInfo}->{actors}.=$check->{'content'} ,last SWITCH2; + ; + } + } + + my $tmp_image=$xml->{'link'}->{'image'}->{'href'}; + $tmp_image =~ s/spic/lpic/; + $self->{curInfo}->{image}=$tmp_image; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + director => 1, + date => 1, + country => 1, + }; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + if ($self->{searchField} eq 'imdb') + { + return "http://api.douban.com/movie/subject/imdb/" .$word; + } + else + { + return "http://api.douban.com/movie/subjects?q=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "豆瓣"; + } + + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + country => 1, + }; + + } + + sub getAuthor + { + return 'BW'; + } + + sub getLang + { + return 'ZH'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "UTF-8"; + } + sub getSearchFieldsArray + { + return ['imdb', 'title']; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'ZH'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityEN.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityEN.pm new file mode 100644 index 0000000..2774b36 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityEN.pm @@ -0,0 +1,334 @@ +package GCPlugins::GCfilms::GCFilmAffinityEN; + +################################################### +# +# Copyright 2005-2007 Tian +# Edited 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmAffinityEN; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ( ($tagname eq 'a') + && ($attr->{href} =~ /\/en\/.*\.php\?movie_id=([0-9]*)/)) + { + $self->{hasUrl} = 'film' . $1 . '.html'; + } + } + elsif (!$self->{isMovie} + && ($tagname eq 'a') + && ($attr->{href} =~ /^\/en\/(film.*)$/)) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq 'span') + && ($attr->{style} eq 'font-size: 10px; color:#666666')) + { + $self->{isDirector} = 1; + } + elsif (($tagname eq 'div') + && ($attr->{style} eq 'font-size: 10px')) + { + $self->{isActors} = 1; + } + } + else + { + if ( ($tagname eq 'span') + && ($attr->{style} eq 'color:#990000; font-size:16; font-weight: bold;')) + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'img') + { + if ($attr->{src} =~ /^\/imgs\/countries/) + { + $self->{curInfo}->{country} = $attr->{title}; + } + elsif ($attr->{src} =~ /pics.*filmaffinity\.com\/.*-full\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src} + if not exists $self->{curInfo}->{image}; + } + } + elsif ($tagname eq 'a') + { + if ($attr->{href} =~ /pics.*filmaffinity\.com\/.*-large\.jpg/) + { + $self->{curInfo}->{image} = $attr->{href}; + } + } + elsif ($tagname eq 'td') + { + if ($attr->{style} =~ /font-size:22px; font-weight: bold;/) + { + $self->{isRating} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ($self->{hasUrl}) + { + $self->{itemsList}[0]->{url} = $self->{hasUrl}; + $self->{hasUrl} = 0; + } + return; + } + if ($self->{inside}->{title} && ($origtext !~ /^Search\s+for /)) + { + $self->{parsingEnded} = 1; + $self->{hasUrl} = 0; + $self->{itemIdx} = 0; + } + elsif ($self->{isMovie}) + { + return if $origtext !~ /\w/; + return if $origtext eq 'Add to lists'; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isTitle} = 1; + } + elsif ($self->{isTitle}) + { + (my $year = $origtext) =~ s/\s*\(([0-9]{4})\)\s*/$1/; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $year; + $self->{isTitle} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $origtext; + $self->{isActors} = 0; + } + } + else + { + $origtext =~ s/^\s*//; + + return if !$origtext; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isOrig} = 0; + } + elsif ($self->{isDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{isDate} = 0; + } + elsif ($self->{isTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{isTime} = 0; + } + elsif ($self->{isDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + if ($self->{inside}->{a} && $origtext) + { + $origtext =~ s/\n//g; + $self->{curInfo}->{actors} .= $origtext . ', '; + } + } + elsif ($self->{isGenre}) + { + $self->{curInfo}->{genre} = $origtext; + $self->{curInfo}->{genre} =~ s/\s*\/\s*/,/g; + $self->{isGenre} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + elsif ($self->{isRating}) + { + $origtext =~ s/,/\./; # replace comma + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + $self->{isRating} = 0; + } + + if ($self->{inside}->{b}) + { + if ($origtext eq 'ORIGINAL TITLE') + { + $self->{isOrig} = 1; + } + elsif ($origtext eq 'YEAR') + { + $self->{isDate} = 1; + } + elsif ($origtext eq 'RUNNING TIME') + { + $self->{isTime} = 1; + } + elsif ($origtext eq 'DIRECTOR') + { + $self->{isDirector} = 1; + } + elsif ($origtext eq 'CAST') + { + $self->{isActors} = 1; + } + elsif ($origtext eq 'STUDIO/PRODUCER') + { + $self->{curInfo}->{actors} =~ s/, $//; + $self->{isActors} = 0; + } + elsif ($origtext eq 'GENRE') + { + $self->{isGenre} = 1; + } + elsif ($origtext eq 'SYNOPSIS/PLOT') + { + $self->{isSynopsis} = 1; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.filmaffinity.com/en/search.php?" + ."stext=$word&stype=title"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.filmaffinity.com/en/' . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Film affinity (EN)"; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'Tian & PIN edited by FiXx'; + } + + sub getLang + { + return 'EN'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityES.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityES.pm new file mode 100644 index 0000000..4c39ae5 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmAffinityES.pm @@ -0,0 +1,334 @@ +package GCPlugins::GCfilms::GCFilmAffinityES; + +################################################### +# +# Copyright 2005-2007 Tian +# Edited 2009 by FiXx +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmAffinityES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ( ($tagname eq 'a') + && ($attr->{href} =~ /\/es\/.*\.php\?movie_id=([0-9]*)/)) + { + $self->{hasUrl} = 'film' . $1 . '.html'; + } + } + elsif (!$self->{isMovie} + && ($tagname eq 'a') + && ($attr->{href} =~ /^\/es\/(film.*)$/)) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq 'span') + && ($attr->{style} eq 'font-size: 10px; color:#666666')) + { + $self->{isDirector} = 1; + } + elsif (($tagname eq 'div') + && ($attr->{style} eq 'font-size: 10px')) + { + $self->{isActors} = 1; + } + } + else + { + if ( ($tagname eq 'span') + && ($attr->{style} eq 'color:#990000; font-size:16; font-weight: bold;')) + { + $self->{isTitle} = 1; + } + elsif ($tagname eq 'img') + { + if ($attr->{src} =~ /^\/imgs\/countries/) + { + $self->{curInfo}->{country} = $attr->{title}; + } + elsif ($attr->{src} =~ /pics.*filmaffinity\.com\/.*-full\.jpg/) + { + $self->{curInfo}->{image} = $attr->{src} + if not exists $self->{curInfo}->{image}; + } + } + elsif ($tagname eq 'a') + { + if ($attr->{href} =~ /pics.*filmaffinity\.com\/.*-large\.jpg/) + { + $self->{curInfo}->{image} = $attr->{href}; + } + } + elsif ($tagname eq 'td') + { + if ($attr->{style} =~ /font-size:22px; font-weight: bold;/) + { + $self->{isRating} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ($self->{hasUrl}) + { + $self->{itemsList}[0]->{url} = $self->{hasUrl}; + $self->{hasUrl} = 0; + } + return; + } + if ($self->{inside}->{title} && ($origtext !~ /^Búsqueda\s+de /)) + { + $self->{parsingEnded} = 1; + $self->{hasUrl} = 0; + $self->{itemIdx} = 0; + } + elsif ($self->{isMovie}) + { + return if $origtext !~ /\w/; + return if $origtext eq 'Añadir a listas'; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isTitle} = 1; + } + elsif ($self->{isTitle}) + { + (my $year = $origtext) =~ s/\s*\(([0-9]{4})\)\s*/$1/; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $year; + $self->{isTitle} = 0; + } + elsif ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $origtext; + $self->{isActors} = 0; + } + } + else + { + $origtext =~ s/^\s*//; + + return if !$origtext; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isOrig} = 0; + } + elsif ($self->{isDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{isDate} = 0; + } + elsif ($self->{isTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{isTime} = 0; + } + elsif ($self->{isDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{isDirector} = 0; + } + elsif ($self->{isActors}) + { + if ($self->{inside}->{a} && $origtext) + { + $origtext =~ s/\n//g; + $self->{curInfo}->{actors} .= $origtext . ', '; + } + } + elsif ($self->{isGenre}) + { + $self->{curInfo}->{genre} = $origtext; + $self->{curInfo}->{genre} =~ s/\s*\/\s*/,/g; + $self->{isGenre} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + elsif ($self->{isRating}) + { + $origtext =~ s/,/\./; # replace comma + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + $self->{isRating} = 0; + } + + if ($self->{inside}->{b}) + { + if ($origtext eq 'TÍTULO ORIGINAL') + { + $self->{isOrig} = 1; + } + elsif ($origtext eq 'AÑO') + { + $self->{isDate} = 1; + } + elsif ($origtext eq 'DURACIÓN') + { + $self->{isTime} = 1; + } + elsif ($origtext eq 'DIRECTOR') + { + $self->{isDirector} = 1; + } + elsif ($origtext eq 'REPARTO') + { + $self->{isActors} = 1; + } + elsif ($origtext eq 'PRODUCTORA') + { + $self->{curInfo}->{actors} =~ s/, $//; + $self->{isActors} = 0; + } + elsif ($origtext eq 'GÉNERO') + { + $self->{isGenre} = 1; + } + elsif ($origtext eq 'SINOPSIS') + { + $self->{isSynopsis} = 1; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.filmaffinity.com/es/search.php?" + ."stext=$word&stype=title"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return 'http://www.filmaffinity.com/es/' . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Film affinity (ES)"; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'Tian & PIN edited by FiXx'; + } + + sub getLang + { + return 'ES'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmUP.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmUP.pm new file mode 100644 index 0000000..8a47dff --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmUP.pm @@ -0,0 +1,252 @@ +package GCPlugins::GCfilms::GCFilmUP; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +#use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmUP; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub getSearchUrl + { + my ($self, $word) = @_; + my $url; + + $url = + "http://filmup.leonardo.it/cgi-bin/search.cgi?" + . "ps=10&fmt=long&q=$word" + . "&ul=%25%2Fsc_%25&x=52&y=7&m=all&wf=2221&wm=wrd&sy=0"; + + return $url; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return 'http://filmup.leonardo.it/'; + } + + sub getName + { + return "FilmUP"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'IT'; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'a') + { + if ($self->{insideInfos}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $self->{lasUrl}; + $self->{insideInfos} = 0; + } + + $self->{lasUrl} = $attr->{href}; + } + } + else + { + if ($tagname eq 'img') + { + $self->{curInfo}->{image} = $self->getItemUrl . $attr->{src} + if $attr->{src} =~ /^locand\// && ($attr->{src} ne 'locand/no.gif'); + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{inside}->{dt} && $self->{inside}->{a}) + { + if ($origtext =~ m/FilmUP - Scheda: (.*)/) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $1; + $self->{insideInfos} = 1; + } + } + if ( $self->{inside}->{small} + && $self->{inside}->{table} + && $self->{insideInfos}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ /Anno: ([0-9]+)/; + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $1 + if $origtext =~ /Regia: (.*?)((Sito)|$)/; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1 + if $origtext =~ /Cast: (.*?)$/; + } + } + else + { + if ($self->{inside}->{h1}) + { + $self->{curInfo}->{title} = $origtext; + } + elsif ($self->{inside}->{td} && ($origtext !~ /^[\r\n]+$/)) + { + $self->{insideTime} = 0 if $origtext =~ /Regia:/; + if ($self->{insideOriginal}) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideGenre}) + { + if (!$self->{curInfo}->{genre}) + { + $origtext =~ s|/|,|; + $self->{curInfo}->{genre} = $origtext; + } + $self->{insideGenre} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + $self->{curInfo}->{actors} = $origtext; + $self->{insideActors} = 0; + } + + $self->{insideOriginal} = 1 if $origtext =~ /Titolo originale:/; + $self->{insideNat} = 1 if $origtext =~ /Nazione:/; + $self->{insideDate} = 1 if $origtext =~ /Anno:/; + $self->{insideGenre} = 1 if $origtext =~ /Genere:/; + $self->{insideTime} = 1 if $origtext =~ /Durata:/; + $self->{insideDirector} = 1 if $origtext =~ /Regia:/; + $self->{insideActors} = 1 if $origtext =~ /Cast:/; + } + if ($self->{inside}->{synopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s/\222/'/g; + + $html =~ s{<font face="arial, helvetica" size="3">(.*?)</font>} + {<h1>$1</h1>}g; + $html =~ s{</table>.<br>.<font face="arial, helvetica" size="2">(.*?)</font>} + {</table><synopsis>$1</synopsis>}ms; + $html =~ s{<font face="arial, helvetica" size="2">Trama:(.*?)</font>} + {<synopsis>$1</synopsis>}; + $html =~ s{Trama:<br>}{}; + $html =~ s{<span .*?>|</span>} {}g; + $html =~ s{<a .*?href="\/?personaggi.*?>(.+?)</a>} {$1}g; + + $html =~ s{<font .*?>|</font>} {}g; + $html =~ s{</?b>} {}g; + + return $html; + } + + sub getCharset + { + my $self = shift; + + return "Windows-1252"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCFilmWeb.pm b/lib/gcstar/GCPlugins/GCfilms/GCFilmWeb.pm new file mode 100644 index 0000000..f7c18cd --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCFilmWeb.pm @@ -0,0 +1,369 @@ +package GCPlugins::GCfilms::GCFilmWeb; + +################################################### +# +# Copyright 2005-2010 Tian, Michael Mayer +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginFilmWeb; + + use LWP::Simple qw($ua); + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{parsingEnded}) + { + if ( ($tagname eq 'input') + && ($attr->{name} eq 'id')) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = + 'http://www.filmweb.pl/Film?id=' . $attr->{value}; + } + } + + if ($tagname eq 'a') + { + if ($attr->{class} eq 'searchResultTitle') + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + elsif ($attr->{href} =~ m|/search/film\?countryIds=|) + { + $self->{isCountry} = 1; + } + } + elsif ($tagname eq 'span') + { + if ($attr->{class} eq 'searchResultDetails') + { + $self->{isYear} = 1; + } + } + } + else + { + return if ($self->{parsingEnded}); + + if ($tagname eq 'strong') + { + if ($attr->{class} eq "rating") + { + $self->{isRating} = 1; + } + } + elsif ($tagname eq 'div') + { + if ($attr->{class} eq "time") + { + $self->{isTime} = 1; + } + elsif ($attr->{class} eq "posterLightbox") + { + $self->{isImage} = 1; + } + elsif ($attr->{class} =~ /castListWrapper/) + { + $self->{isCast} = 1; + } + elsif ($attr->{class} =~ /additional-info/) + { + $self->{parsingEnded} = 1; + } + } + elsif ($tagname eq 'span') + { + if ($attr->{class} eq 'filmDescrBg') + { + $self->{isSynopsis} = 1; + } + } + elsif (($tagname eq 'a') && $self->{isImage}) + { + # big image + $self->{curInfo}->{image} = $attr->{href}; + } + elsif (($tagname eq 'img') && $self->{isImage}) + { + # small image + $self->{curInfo}->{image} = $attr->{src} + if (!$self->{bigPics}); + $self->{isImage} = 0; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq "tr") + { + $self->{key} = ""; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + return if !$origtext; + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + + if ($self->{isMovie}) + { + if ($self->{inside}->{a}) + { + my $title; + my $original; + ($title, $original) = split (/\s*\/\s*/, $origtext, 2); + return if !$title; + + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $title; + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $original; + $self->{isMovie} = 0; + } + } + elsif ($self->{isYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ /([0-9]{4})/; + $self->{isYear} = 0; + } + elsif ($self->{isCountry}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{country} .= + $self->{itemsList}[ $self->{itemIdx} ]->{country} ? + ", " . $origtext + : $origtext; + $self->{isCountry} = 0; + } + } + else + { + + if ($self->{inside}->{title}) + { + # content of title field is formatted like this: + # Obcy - 8. pasażer "Nostromo" / Alien (1979) - Filmweb + # or (if polish title and original title are identical): + # Batman (1989) - Filmweb + $origtext =~ m|(.*)\s+\((\d{4})\)\s+-\s+Filmweb|; + $self->{curInfo}->{date} = $2; + ($self->{curInfo}->{original}, + $self->{curInfo}->{title}) = split (/\s+\/\s+/, $1, 2); + if (!$self->{curInfo}->{title}) + { + $self->{curInfo}->{title} = $self->{curInfo}->{original}; + } + } + elsif ($self->{isRating}) + { + $origtext =~ s/,/\./; + $self->{curInfo}->{ratingpress} = int ($origtext + 0.5); + $self->{isRating} = 0; + } + elsif ($self->{isSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{isSynopsis} = 0; + } + elsif ($self->{inside}->{th}) + { + $self->{key} = $origtext; + } + elsif ($self->{inside}->{td} && $self->{inside}->{a}) + { + if ($self->{key} eq "reżyseria:") + { + $self->{curInfo}->{director} .= + $self->{curInfo}->{director} ? ", " . $origtext : $origtext; + } + if ($self->{key} eq "produkcja:") + { + $self->{curInfo}->{country} .= + $self->{curInfo}->{country} ? ", " . $origtext : $origtext; + } + if ($self->{key} eq "gatunek:") + { + $self->{curInfo}->{genre} .= + $self->{curInfo}->{genre} ? ", " . $origtext : $origtext; + } + } + elsif ($self->{isCast}) + { + if ($self->{inside}->{h3}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{isRole} = 1; + } + else + { + if ($self->{isRole} + && ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS)) + { + # As we incremented it above, we have one more + # chance here to add a role Without <= we would skip + # the role for last actor + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter}-1 ]}, $origtext + } + $self->{isRole} = 0; + } + } + elsif ($self->{isTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{isTime} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + country => 1, + }; + + $self->{isMovie} = 0; + $self->{isYear} = 0; + $self->{isCountry} = 0; + $self->{curName} = undef; # why? + $self->{curUrl} = undef; # why? + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + + if ($self->{parsingList}) + { + $html =~ s|</?b>||gms; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # Grab the home page first to receive a fresh, valid cookie + my $response = $ua->get('http://www.filmweb.pl/'); + + return "http://www.filmweb.pl/search?q=$word&alias=film"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url =~ /^http:/; + return "http://www.filmweb.pl" . $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return 'FilmWeb'; + } + + sub getExtra + { + return ''; + } + + + sub getCharset + { + my $self = shift; + + return 'ISO-8859-2'; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'PL'; + } + + sub getDefaultPictureSuffix + { + return '.jpg'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCIbs.pm b/lib/gcstar/GCPlugins/GCfilms/GCIbs.pm new file mode 100644 index 0000000..2cb141d --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCIbs.pm @@ -0,0 +1,409 @@ +package GCPlugins::GCfilms::GCIbs; +################################################### +# +# Copyright 2008 t-storm +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginIbs; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + if ($tagname eq "a") + { + if ($attr->{href} =~ m/mymovies\/list\?pending\&add=([0-9]*)/) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = '/title/tt' . $1 . '/'; + } + } + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if ( ($url =~ /^http:\/\/www.ibs.it\/dvd\/[0-9]+\//) + && (!$self->{alreadyListed}->{$url})) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{alreadyListed}->{$url} = 1; + } + } + elsif ($tagname eq 'td') + { + if ($attr->{class} eq 'ttitolettobianco') + { + $self->{isYear} = 1; + $self->{isMovie} = 0; + } + } + } + else + { + if ($tagname eq "a") + { + $self->{currentHref} = $attr->{href}; + + if ($attr->{href} =~ +m/javascript:Jackopen\('http:\/\/giotto.internetbookshop.it\/cop\/copdjc.asp\?e=([0-9]+)'\)/ + ) + { + $self->{curInfo}->{image} = + "http://giotto.internetbookshop.it/cop/copdjc.asp?e=$1"; + } + if ($attr->{href} =~ m/^\/film\/regista\//) + { + $self->{insideDirector} = 1; + } + elsif ($attr->{href} =~ m/^\/film\/attore\//) + { + $self->{insideActors} = 1; + $self->{insideRoles} = 0; + $self->{insideDirector} = 0; + } + else + { + $self->{insideSynopsis} = 0 if ($attr->{href} =~ m/plotsummary/); + $self->{insideGenre} = 1 + if ($attr->{href} =~ m|/Sections/Genres/|) + && !($self->{curInfo}->{synopsis} + || $self->{curInfo}->{country} + || $self->{curInfo}->{time}); + } + } + elsif ($tagname eq 'td') + { + if ($attr->{class} eq 'lbarrasup') + { + $self->{isMovie} = 1; + $self->{insideSynopsis} = 0; + } + } + elsif ($tagname eq "SPAN") + { + if ($self->{inside}->{langue}) + { + $self->{inside}->{langueLANG} = 1; + $self->{inside}->{langueCODEC} = 0; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + if ($tagname eq "SPAN") + { + if ($self->{inside}->{langue}) + { + $self->{inside}->{langueLANG} = 0; + $self->{inside}->{langueCODEC} = 1; + } + } + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $self->{listDate}; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isYear}) + { + $origtext =~ /([0-9]+)/; + $self->{listDate} = $1; + $self->{isYear} = 0; + } + if ($self->{isDirector}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 0; + $self->{isDirector} = 0; + return; + } + $self->{isDirector} = 1 if $origtext =~ m/Regia di /; + } + else + { + $self->{inside}->{langue} = 0 if $origtext =~ m/Lingua sottotitoli/; + if ($self->{insideGenre}) + { + $origtext =~ s/\s*$//; + $self->{curInfo}->{genre} .= $self->capWord($origtext) . ','; + $self->{curInfo}->{genre} =~ s|\s*/\s*|,|g; + $self->{insideGenre} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + $self->{insideSynopsis} = 0; + } + elsif ($self->{isCountry}) + { + $origtext =~ /(.+), (.+)/; + $self->{curInfo}->{country} .= $1; + $self->{curInfo}->{date} = $2; + $self->{isCountry} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{curInfo}->{time} =~ s/.*?://; + $self->{insideTime} = 0; + } + elsif ($self->{insideActors}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + $self->{insideActors} = 0; + } + elsif ($self->{insideRoles}) + { + # As we incremented it above, we have one more chance here to add a role + # Without <= we would skip the role for last actor + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} - 1 ]}, + $origtext + if ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{insideRoles} = 0; + } + elsif ($self->{inside}->{langue}) + { + if ($self->{inside}->{span}) + { + $self->{curInfo}->{language} = $origtext; + } + else + { + $origtext =~ s/^, //; + $origtext =~ s/ - $//; + push @{$self->{curInfo}->{audio}}, + [ $self->{curInfo}->{language}, $origtext ]; + } + } + elsif ($self->{inside}->{soustitre}) + { + my @sottotitoli = split(' - ', $origtext); + my $subss; + foreach $subss (@sottotitoli) + { + push @{$self->{curInfo}->{subt}}, [$subss]; + } + + $self->{inside}->{soustitre} = 0; + } + elsif ($self->{isMovie}) + { + + if ($self->{isMovie1}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isMovie1} = 0; + } + elsif ($self->{isMovie2}) + { + $self->{curInfo}->{original} = $origtext; + $self->{isMovie} = 0; + $self->{isMovie2} = 0; + } + } + else + { + if ($origtext =~ m{User\s+Rating:\s+(\d+\.\d+)/10\s+}) + { + $self->{curInfo}->{ratingpress} = int($1 + 0.5); + } + ; # if + } + ; # if + + if ($origtext eq "Titolo") + { + $self->{isMovie1} = 1; + $self->{isMovie2} = 0; + } + elsif ($origtext eq "Titolo originale") + { + $self->{isMovie1} = 0; + $self->{isMovie2} = 1; + } + elsif ($origtext eq "Paese, Anno") + { + $self->{isCountry} = 1; + } + elsif ($origtext eq "Dati tecnici") + { + $self->{insideTime} = 1; + } + elsif ($origtext eq "Genere") + { + $self->{insideGenre} = 1; + } + elsif ($origtext eq "Descrizione") + { + $self->{insideSynopsis} = 1; + } + elsif ($origtext =~ m/Vietato ai minori di ([0-9]+) anni/) + { + $self->{curInfo}->{age} = $1; + } + elsif ($origtext eq "Lingua audio") + { + $self->{inside}->{langue} = 1; + } + elsif ($origtext eq "Lingua sottotitoli") + { + $self->{inside}->{langue} = 0; + $self->{inside}->{soustitre} = 1; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + $html =~ s{</?(?:b|small)>}{}gi; + + if ($self->{parsingList}) + { + $self->{alreadyListed} = {}; + } + else + { + $html =~ s|<a href="synopsis">[^<]*</a>||gi; + $html =~ s|<a href="/name/.*?">([^<]*)</a>|$1|gi; + $html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi; + #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi; + $self->{curInfo}->{actors} = []; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.ibs.it/dvd/ser/serpge.asp?ty=kw&dh=100&SEQ=Q&T=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url =~ /^http:/; + return "http://www.ibs.it" . $url; + } + + sub getName + { + return "Internet Bookshop"; + } + + sub getAuthor + { + return 't-storm'; + } + + sub getLang + { + return 'IT'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm new file mode 100644 index 0000000..70af804 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm @@ -0,0 +1,439 @@ +package GCPlugins::GCfilms::GCImdb; + +################################################### +# +# Copyright 2010 groms +# +# Features: +# + Multiple directors separated by comma +# + Multiple countries separated by comma +# + Correct URL in case of redirection +# + Fetches Original Title +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginImdb; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url})) + { + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + $self->{alreadyListed}->{$url} = 1; + } + } + } + else + { + + if ($tagname eq "link") + { + if ($attr->{rel} eq "canonical") + { + $self->{curInfo}->{webPage} = $attr->{href}; + } + } + elsif ($tagname eq "h1") + { + if ($attr->{class} eq "header") + { + $self->{insideHeader} = 1; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} eq "infobar") + { + $self->{insideInfobar} = 1; + } + } + elsif ($tagname eq "table") + { + if ($attr->{class} eq "cast_list") + { + $self->{insideCastList} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{itemprop} eq "ratingValue") + { + $self->{insideRating} = 1; + } + elsif ($attr->{class} eq "title-extra") + { + $self->{insideOriginalTitle} = 1; + } + } + elsif ($tagname eq "img") + { + if ($self->{insidePrimaryImage}) + { + if (!($attr->{src} =~ m/nopicture/)) + { + ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./; + } + } + elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|) + { + my $cert = $attr->{title}; + $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open'); + $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved'); + $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP'); + $self->{curInfo}->{age} = 13 if $cert eq 'PG_13'; + $self->{curInfo}->{age} = 17 if $cert eq 'R'; + $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X'); + } + } + elsif ($tagname eq "a") + { + if ($self->{insideHeader} && $attr->{href} =~ m/year/) + { + $self->{insideYear} = 1; + } + elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/) + { + $self->{insideGenre} = 1; + } + } + elsif ($tagname eq 'td') + { + if ($self->{insideCastList}) + { + if ($attr->{class} eq 'name') + { + $self->{insideActor} = 1; + } + elsif ($attr->{class} eq 'character') + { + $self->{insideRole} = 1; + } + } + elsif ($attr->{id} eq "img_primary") { + $self->{insidePrimaryImage} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + if ($self->{parsingList}) + { + if ($self->{isMovie} && ($tagname eq 'a')) + { + $self->{isMovie} = 0; + my $url = $self->{itemsList}[$self->{itemIdx}]->{url}; + if (!$self->{itemsList}[$self->{itemIdx}]->{title}) + { + $self->{alreadyListed}->{$url} = 0; + $self->{itemIdx}--; + } + } + } else { + if ($tagname eq "h1") + { + $self->{insideHeader} = 0; + } + elsif ($tagname eq "a") + { + $self->{insideYear} = 0; + $self->{insideGenre} = 0; + $self->{insideActor} = 0; + $self->{insideRole} = 0; + } + elsif ($tagname eq "div") + { + $self->{insideInfobar} = 0; + $self->{insideNat} = 0; + $self->{insideDirector} = 0; + $self->{insideStoryline} = 0; + $self->{insideReleaseDate} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideRating} = 0; + $self->{insideOriginalTitle} = 0; + } + elsif ($tagname eq "table") + { + $self->{insideCastList} = 0; + } + elsif ($tagname eq "td") + { + $self->{insidePrimaryImage} = 0; + } + elsif ($self->{insideCastList}) + { + if ($self->{actor} && $self->{role}) + { + $self->{actor} =~ s/^\s+|\s+$//g; + $self->{actor} =~ s/\s{2,}/ /g; + push @{$self->{curInfo}->{actors}}, [$self->{actor}]; + $self->{role} =~ s/^\s+|\s+$//g; + $self->{role} =~ s/\s{2,}/ /g; + push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role}; + $self->{actorsCounter}++; + } + $self->{actor} = ""; + $self->{role} = ""; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/^\s+|\s+$//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i) + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + if ($self->{isMovie}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isInfo}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|; + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideHeader}) + { + if ($self->{insideYear}) + { + $self->{curInfo}->{date} = $origtext; + } + elsif (!$self->{curInfo}->{title}) + { + $self->{curInfo}->{title} = $origtext; + if (!$self->{curInfo}->{original}) + { + $self->{curInfo}->{original} = $origtext; + } + } + elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i}) + { + $self->{curInfo}->{original} = $origtext; + } + } + elsif ($self->{insideInfobar}) + { + if ($self->{insideGenre}) + { + if ($self->{curInfo}->{genre}) + { + $self->{curInfo}->{genre} .= ","; + } + $self->{curInfo}->{genre} .= $origtext; + } + elsif ($origtext =~ m/([0-9]+ min)/) + { + $self->{curInfo}->{time} = $1; + } + } + elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/) + { + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideNat}) + { + if ($origtext =~ m/[^\s].+/) + { + if ($self->{curInfo}->{country} =~ m/.+/) + { + $self->{curInfo}->{country} .= ", ".$origtext; + } + else + { + $self->{curInfo}->{country} = $origtext; + } + } + } + elsif ($self->{insideCastList}) + { + if ($self->{insideActor}) + { + $self->{actor} .= $origtext; + } + elsif ($self->{insideRole}) + { + $self->{role} .= $origtext; + } + } + elsif ($self->{insideStoryline} && $self->{inside}{p}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideStoryline} = 0; + } + elsif ($self->{insideDirector} && $self->{inside}->{div}) + { + $origtext =~ s/,/, /; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) { + if ($origtext =~ m/([0-9]{4})/) + { + $self->{curInfo}->{date} = $1; + $self->{insideReleaseDate} = 0; + } + } + + if ($self->{inside}->{h2}) + { + $self->{insideStoryline} = 1 if ($origtext eq "Storyline"); + } + elsif ($self->{inside}->{h4}) + { + $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/; + $self->{insideTime} = 1 if $origtext =~ m/Runtime:/; + $self->{insideNat} = 1 if $origtext =~ m/Country:/; + $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + if ($self->{parsingList}) + { + $self->{alreadyListed} = {}; + } + else + { + #$html =~ s|<a href="synopsis">[^<]*</a>||gi; + #$html =~ s|<a href="/name/.*?"[^>]*>([^<]*)</a>|$1|gi; + #$html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi; + #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi; + + # Commented out this line, causes bug #14420 when importing from named lists + #$self->{curInfo}->{actors} = []; + } + + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.imdb.com/find?s=tt&q=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.imdb.com" if $url eq ""; + return $url if $url =~ /^http:/; + return "http://www.imdb.com".$url; + } + + sub getName + { + return "IMDb"; + } + + sub getAuthor + { + return 'groms'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm b/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm new file mode 100644 index 0000000..d950395 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm @@ -0,0 +1,386 @@ +package GCPlugins::GCfilms::GCKinopoisk; + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginKinopoisk; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{class} eq "all") + { + my $url = $attr->{href}; + if ($url =~ m/\/level\/1\/film/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + if ($attr->{class} eq "orange") + { + $self->{isYear} = 1; + } + } + elsif ($tagname eq "title") + { + $self->{insideHTMLtitle} = 1; + } + } + else + { + if ($attr->{class} eq "moviename-big" && $attr->{style} eq "margin: 0; padding: 0") + { + $self->{insideTitle} = 1; + } + elsif ($tagname eq "span") + { + if ($attr->{style} eq "color: #666; font-size: 13px") + { + $self->{insideOriginal} = 1; + } + elsif ($attr->{class} eq "_reachbanner_" && $self->{insideSynopsis} == 0) + { + $self->{insideSynopsis} = 1; + } + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Byear\%5D/) + { + $self->{insideDate} = 1; + } + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bcountry\%5D/) + { + if ($self->{isCountry} >= 2) + { + $self->{insideCountry} = 1; + $self->{isCountry}++; + } + } + if ($attr->{href} =~ m/\/level\/4\/people/) + { + if ($self->{isDirector} >= 2) + { + $self->{insideDirector} = 1; + $self->{isDirector}++; + } + } + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bgenre\%5D/) + { + $self->{insideGenre} = 1; + $self->{isGenre}++; + } + if ($self->{insideActorList}) + { + $self->{isActors} += 1; + $self->{insideActors} = 1; + } + } + elsif ($tagname eq "td") + { + if ($attr->{class} eq "type") + { + $self->{isDirector} = 1; + $self->{isTime} = 1; + $self->{isCountry} = 1; + } + elsif ($self->{isTime} == 2) + { + $self->{insideTime} = 1; + $self->{isTime} = 0; + } + elsif ($attr->{style} eq "vertical-align: top; height: 15px" && $attr->{align} eq "right" && $self->{isActors} >= 0) + { + $self->{isActors} += 1; + $self->{insideActors} = 1; + } + } + elsif ($tagname eq "img" && $attr->{style} eq "border: none; border-left: 10px #f60 solid") + { + if ($attr->{src} ne "/images/image_none.gif") + { + $self->{curInfo}->{image} = "http://www.kinopoisk.ru".$attr->{src}; + } + } + } + } + + sub text + { + my ($self, $origtext) = @_; + return if ($self->{parsingEnded}); + if ($self->{parsingList}) + { + if (($self->{insideHTMLtitle})) + { + if ($origtext =~ m/Результаты\sпоиска/) + { + # + } + else + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + $self->{insideHTMLtitle} = 0; + } + if ($self->{isMovie}) + { + my ($title, $date); + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + return; + } + elsif ($self->{isYear}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; + $self->{isYear} = 0; + return; + } + } + else + { + if ($origtext =~ m/В\s*главных\s*ролях:/) + { + $self->{insideActorList} = 1; + } + if ($origtext =~ m/Роли\s*дублировали:/) + { + $self->{insideActorList} = 0; + } + if ($self->{insideTitle}) + { + $origtext =~ s/\s+$//; + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideOriginal}) + { + $origtext =~ s/^\s+//; + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideCountry} == 1) + { + if ($self->{isCountry} == 3) + { + $self->{curInfo}->{country} = $origtext; + } + elsif ($self->{isCountry} > 3) + { + $self->{curInfo}->{country} = $self->{curInfo}->{country}.", ".$origtext; + } + $self->{insideCountry} = 0; + } + elsif ($self->{insideDirector}) + { + if ($self->{isDirector} == 3) + { + $self->{curInfo}->{director} = $origtext; + } + elsif ($self->{isDirector} > 3) + { + $self->{curInfo}->{director} = $self->{curInfo}->{director}.", ".$origtext; + } + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + if ($self->{isActors} == 1) + { + $self->{curInfo}->{actors} = $origtext; + } + elsif ($self->{isActors} > 1) + { + if ($origtext eq "...") + { + $self->{isActors} = -1; + } + else + { + $self->{curInfo}->{actors} = $self->{curInfo}->{actors}.", ".$origtext; + } + } + $self->{insideActors} = 0; + } + elsif ($self->{insideSynopsis} == 1) + { + #$origtext =~ s/^\s+//; + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 2; + } + elsif ($self->{isTime} == 1 || $self->{isDirector} == 1 || $self->{isCountry} == 1) + { + $self->{isDirector} = 0; + $self->{isTime} = 0; + $self->{isCountry} = 0; + if ($origtext eq "время") + { + $self->{isTime} = 2; + } + elsif ($origtext eq "режиссер") + { + $self->{isDirector} = 2; + } + elsif ($origtext eq "страна") + { + $self->{isCountry} = 2; + } + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + if ($self->{isGenre} == 1) + { + $self->{curInfo}->{genre} = $origtext; + } + elsif ($self->{isGenre} > 1) + { + $self->{curInfo}->{genre} = $self->{curInfo}->{genre}.", ".$origtext; + } + $self->{insideGenre} = 0; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + if ($self->{parsingList}) + { + # Your code for processing search results here + } + else + { + if ($tagname eq "tr" && $self->{isDirector} >= 2) + { + $self->{isDirector} = 0; + } + elsif ($tagname eq "tr" && $self->{isGenre} != 0) + { + $self->{isGenre} = 0; + } + elsif ($tagname eq "td") + { + $self->{insideActorList} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{isYear} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isTime} = 0; + $self->{isGenre} = 0; + $self->{isCountry} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{insideActorList} = 0; + return $self; + } + + sub getName + { + return "Kinopoisk"; + } + + sub getAuthor + { + return 'Nazarov Pavel'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "windows-1251"; + } + + sub getSearchCharset + { + my $self = shift; + return "windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.kinopoisk.ru/index.php?kp_query=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + return "http://www.kinopoisk.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/…/\.\.\./g; + $html =~ s/\x92/'/g; + $html =~ s/\x93/“/g; + $html =~ s/\x94/”/g; + $html =~ s/—/—/g; + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + $html =~ s/<br><br>/\x0A/g; + return $html; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMediadis.pm b/lib/gcstar/GCPlugins/GCfilms/GCMediadis.pm new file mode 100644 index 0000000..4caf406 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMediadis.pm @@ -0,0 +1,316 @@ +package GCPlugins::GCfilms::GCMediadis; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMediadis; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ( ($attr->{href} =~ m|http://www\.mediadis\.com/video/detail\.asp|) + && ($attr->{class} eq 'a-blue')) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($self->{couldBeCast}==1) + && ($attr->{href} =~ m|http://www\.mediadis\.com/products/search\.asp|)) + { + # yes, found the magic link. director(s) to follow. + $self->{couldBeCast} = 2; + } + } + if (($tagname eq 'td') && ($attr->{class} eq 'search-list')) + { + if ($attr->{align} eq 'center') + { + $self->{couldBeYear} = 1; + } + if (($attr->{align} eq 'left') && ($attr->{colspan} eq '5')) + { + $self->{couldBeCast} = 1; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ /^http:\/\/www\.(dvdzone2|mediadis)\.com\/pictures\/big\//) + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + elsif ($tagname eq "p") + { + $self->{insideSynopsis} = 1; + } + elsif ($tagname eq "span") + { + if (($attr->{class} eq "detail-title")) + { + $self->{insideName} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($self->{parsingList}) + { + if ($tagname eq 'tr') { + $self->{couldBeCast} = 0; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + $origtext =~ s/^\s*(\S*)\s*$/$1/; # remove surrouding whitespace + + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $self->capWord($origtext); + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{couldBeYear}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1 + if $origtext =~ m/([0-9]{4})/; + $self->{couldBeYear} = 0; + } + elsif ($self->{couldBeCast} == 2) # waiting for director name + { + if ($origtext eq "-") + { + $self->{couldBeCast} = 3; # read actors now + } + elsif (!$self->{itemsList}[ $self->{itemIdx} ]->{director}) + { + # revert the failed name transposure done my mediadis: + $origtext =~ s/^(.*) (\S+)$/$2 $1/; + # only one entry, no list. + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + } + } + elsif ($self->{couldBeCast} == 3) # waiting for actors names + { + if ($origtext) + { + # revert the failed name transposure done my mediadis: + $origtext =~ s/^(.*) (\S+)$/$2 $1/; + $self->{itemsList}[ $self->{itemIdx} ]->{actors} .= $origtext; + } + } + } + else + { + $origtext =~ s/ : //g if !$self->{insideSynopsis}; + if ($self->{insideRating}) + { + $origtext =~ s{(\d+),(\d+)/10}{$1.$2}; + $self->{curInfo}->{ratingpress} = int ($origtext + 0.5); + $self->{insideRating} = 0; + } + elsif ($self->{insideGenre}) + { + $origtext =~ s/ - /,/g; + # don't scream! Convert all caps to first cap only. + $self->{curInfo}->{genre} .= ucfirst(lc($origtext)); + $self->{insideGenre} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideDirector}) + { + if (!$self->{curInfo}->{director}) + { + my @directors = split(/\s+-\s+/, $origtext); + for (my $i=0; $i<@directors; $i++) + { + # revert the failed name transposure done my mediadis: + $directors[$i] =~ s/^(.*) (\S+)$/$2 $1/; + } + $self->{curInfo}->{director} = join (', ', @directors); + } + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} .= $origtext . "\n\n"; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideNat}) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideActors}) + { + foreach my $name (split(/\s+-\s+/, $origtext)) + { + # revert the failed name transposure done my mediadis: + # move the first name part back in front. + $name =~ s/^(.*) (\S+)$/$2 $1/; + # and store the actors in a proper list. + push @{$self->{curInfo}->{actors}}, [$name] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + $self->{insideActors} = 0; + } + elsif ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $self->capWord($origtext) if !$self->{curInfo}->{original}; + $self->{insideOrig} = 0; + } + elsif (($self->{inside}->{span}) && ($self->{insideName})) + { + $self->{curInfo}->{title} = $self->capWord($origtext) if !$self->{curInfo}->{title}; + } + elsif ($self->{inside}->{strong}) + { + $self->{insideDate} = 1 if $origtext =~ m/Year/; + $self->{insideDirector} = 1 if $origtext =~ m/Director\(s\)/; + $self->{insideGenre} = 1 if $origtext =~ m/Genres/; + $self->{insideOrig} = 1 if $origtext =~ m/Original title/; + $self->{insideTime} = 1 if $origtext =~ m/Duration/; + $self->{insideNat} = 1 if $origtext =~ m/Country/; + $self->{insideActors} = 1 if $origtext =~ m/Actors/ + or $origtext =~ m/Voice of/; + } + if ($self->{inside}->{td}) + { + if ($origtext =~ m/Global rating/) + { + $self->{insideRating} = 1; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, # hide the date as it is wrong most of the time + director => 1, + actors => 1 + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s|<a (class="underline" )?href="http://www\.mediadis\.com/products/search\.asp\?par=[0-9]*" title="Filmography">([^<]*)</a>|$2|g; + $html =~ s/ / /g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.mediadis.com/video/search.asp?t=19&pl=all&kw=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return 'http://www.mediadis.com/video/'; + } + + sub getName + { + return 'Mediadis'; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'EN'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMetropoliES.pm b/lib/gcstar/GCPlugins/GCfilms/GCMetropoliES.pm new file mode 100644 index 0000000..f628a33 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMetropoliES.pm @@ -0,0 +1,382 @@ +package GCPlugins::GCfilms::GCMetropoliES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMetropoliES; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + +# Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html +# el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa + $html =~ s/^.*(<table width="100%" border="0" cellspacing="0" cellpadding="5">.*<\/td>\n <\/tr>\n<\/table>)\n\n\n.*$/$1/gs; + + # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html + $html =~ s/^.*(<table width="100%" border="0" cellspacing="0" cellpadding="5">.*<\/td>\n <\/tr>\n<\/table>)\n<table.*$/$1/gs; + return $html; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + # Código para procesar el resultado de la busqueda + if ($self->{parsingList}) + { + if ($self->{isDate} eq 2) + { + $self->{isDate} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext; + $self->{isTitle} = 1; + return; + } + + if ($self->{isTitle} eq 2) + { + $self->{isTitle} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isOrigTit} = 1; + return; + } + if ($self->{isOrigTit} eq 2) + { + $self->{isOrigTit} = 0; + $self->{isDirector} = 1; + return; + } + + if ($self->{isDirector} eq 2) + { + $self->{isDirector} = 0; + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext; + $self->{insedeInfos} = 0; + return; + } + return; + } + + else + { + $origtext =~ s/\s{2,}//g; + #$origtext =~ s/\n//g if !$self->{insideSynopsis}; + if ($self->{insideName}) + { + if ($origtext =~ /([^\(]*) \(([0-9]{4})\)/) + { + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{date} = $2; + } + $self->{insideName} = 0; + } + if ($self->{inside}->{td}) + { + if ($origtext =~ /(.*), (.*), (.*) Min\./) + { + $self->{curInfo}->{original} = $1; + $self->{curInfo}->{country} = $2; + $self->{curInfo}->{time} = $3; + } + elsif ($self->{insideActors}) + { + $self->{insideActors}--; + if ($self->{insideActors} eq 0) + { + $self->{insideActors} = 0; + $self->{curInfo}->{actors} = $origtext; + } + } + } + if ($self->{insideDirector}) + { + $self->{insideDirector} = 0; + $self->{curInfo}->{director} = $origtext; + } + + if ($self->{inside}->{span}) + { + if ($origtext =~ /Int.rpretes:/) + { + $self->{insideActors} = 2; + } + } + if ($self->{insideSynopsis}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + $self->{insideInfos} = 0; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + # Código para procesar el resultado de la busqueda + #if ($self->{parsingList}){ + #} + # Código para procesar la información de la pelicula seleccionada + #else { + #} + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + # Código para procesar el resultado de la busqueda para generar el listado + if ($self->{parsingList}) + { + # Comprobamos si estamos dentro de un tr con la info de un titulo + if (($tagname eq "tr") && (($attr->{bgcolor} eq "#ECF5FF") || ($attr->{bgcolor} eq "#FFFFFF"))) + { + $self->{insideInfos} = 1; + # Lo primero a leer es la fecha. Indicamos que es el siguiente a procesar + $self->{isDate} = 1; + $self->{isTitle} = 0; + $self->{isOrigTit} = 0; + $self->{isDirector} = 0; + # Aumentamos el número de resultados encontrados + $self->{itemIdx}++; + return; + } + + # Comprobamos que campo de la información estamos pocesando + if ($tagname eq "td" && $self->{insideInfos}) + { + $self->{isDate} = 2 if $self->{isDate} eq 1; + $self->{isOrigTit} = 2 if $self->{isOrigTit} eq 1; + $self->{isDirector} = 2 if $self->{isDirector} eq 1; + } + if ($tagname eq "a" && $self->{isTitle}) + { + $self->{isTitle} = 2; + # Guardamos la Url del enlace + my $url = $attr->{href}; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + # Código para procesar la información de la pelicula seleccionada + else + { + # Si estamos dentro de una imagen y no se ha asignado ninguna, la asignamos + if (($tagname eq "img") & !$self->{curInfo}->{image}) + { +# Imágenes en cmg: +# Thumb http://carteles.metropoliglobal.com/galerias/data/1149/1563-2008-rastrooculto-espanol-210459-thumb.jpg +# Normal: http://carteles.metropoliglobal.com/galerias/data/1149/1563-2008-rastrooculto-espanol-210459.jpg +# Extraemos la dirección de la imagen a partir del thumb + if ($attr->{src} =~ /\.\.\/(galerias\/data\/[0-9]*\/.*)-thumb\.jpg/) + { + $self->{curInfo}->{image} = "http://carteles.metropoliglobal.com/" . $1 . ".jpg"; + } + } + + # Comprobamos el rating + if ($tagname eq "img") + { + # En cmg la puntuación está asignada con una imagen con el formato ratingX.gif donde + # X está entre 0 y 5 + if ($attr->{src} =~ /imagenes\/rating([0-5])\.gif/) + { + $self->{curInfo}->{ratingpress} = ($1 / 5) * 10; + } + } + elsif ($tagname eq "span") + { + $self->{insideName} = 1 if $attr->{class} eq "title"; + $self->{insideInfos} = 1 if $attr->{class} eq "title"; + } + elsif ($tagname eq "td") + { + $self->{insideDirector} = 1 if $attr->{width} eq "84%"; + if ($self->{insideInfos}) + { + $self->{insideSynopsis} = 1 if $attr->{colspan} eq "2"; + } + } + } + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + #sub changeUrl + #{ + # my ($self, $url) = @_; + # return $url; + #} + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + #sub getExtra + #{ + # return 'Extra'; + #} + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return "ES"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return "DoVerMan"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return 'CartelesMetropoliGlobal'; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + # Charset de la web + return "iso-8859-1"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + # url contendrá ficha.php?...... + + return "http://carteles.metropoliglobal.com/paginas/$url"; + } + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://carteles.metropoliglobal.com/paginas/ficha.php" + . "?qbtitulo=$word&qbbuscar=titulo&Submit=Buscar&qsec=buscar"; + } + + # Constructor + sub new + { + # Inicialización + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran + # en el listado de resultados + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 0, + }; + + # Indica si estamos procesando información útil + $self->{insideInfos} = 0; + + # Indican el estado del procesado del listado de resultados (0 no procesar, 1 es el siguiente, 2 procesando) + $self->{isDate} = 0; + $self->{isTitle} = 0; + $self->{isOrigTit} = 0; + $self->{isDirector} = 0; + + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMonsieurCinema.pm b/lib/gcstar/GCPlugins/GCfilms/GCMonsieurCinema.pm new file mode 100644 index 0000000..1e989c2 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMonsieurCinema.pm @@ -0,0 +1,272 @@ +package GCPlugins::GCfilms::GCMonsieurCinema; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +################################### +# # +# Plugin soumis par MeV # +# # +################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMonsieurCinema; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ /^http\:\/\/cinema\.tiscali\.fr\/fichefilm\.aspx/) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + if ($attr->{src} =~ + m|^http\://media\.monsieurcinema\.com/film/[0-9]*/[0-9]*/[0-9]*\.jpg|) + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + elsif ($tagname eq "b") + { + if ($attr->{class} eq "sous_titre") + { + $self->{insideName} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} eq "sous_titre") + { + $self->{insideDate} = 1; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} eq "movie_infos") + { + $self->{insideInfos} = 1; + } + elsif ($attr->{align} eq "justify") + { + $self->{insideSynopsis} = 1; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($origtext =~ /, de ([^(]*)�\(([0-9]{4})\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"director"} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $2; + } + } + else + { + $origtext =~ s/\s{2,}//g; + $origtext =~ s/\[endline\]/\n/g if !$self->{insideSynopsis} && !$self->{insideCast}; + + if ($self->{insideName}) + { + $self->{curInfo}->{title} = $self->capWord($origtext); + $self->{insideName} = 0; + } + elsif ($self->{insideDate}) + { + if ($origtext =~ /\(([0-9]{4})\)/) + { + $self->{curInfo}->{date} = $1; + $self->{insideCast} = 1; + } + $self->{insideDate} = 0; + } + elsif ($self->{insideInfos}) + { + if (($origtext =~ /Genre\s*\:\s*(.*)/) || ($origtext =~ /Catégorie\s*\:\s*(.*)/)) + { + $self->{curInfo}->{genre} .= $self->{curInfo}->{genre} ? "," . $1 : $1; + $self->{curInfo}->{genre} =~ s/, /,/g; + } + elsif ($origtext =~ /Durée\s*\:\s*(.*)/) + { + $self->{curInfo}->{time} = $1; + } + elsif ($origtext =~ /Pays\s*\:\s*(.*)/) + { + $self->{curInfo}->{country} = $1; + } + elsif ($origtext =~ /Public\s*\:\s*(.*)/) + { + if ($1 eq 'Tous publics') + { + $self->{curInfo}->{age} = 2; + } + else + { + $self->{curInfo}->{age} = $1; + $self->{curInfo}->{age} =~ s/.*?([0-9]+).*/$1/; + } + } + $self->{insideInfos} = 0; + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ s/\[endline\]/\n/g; + $self->{curInfo}->{synopsis} = $origtext if !$self->{curInfo}->{synopsis}; + $self->{insideSynopsis} = 0; + } + elsif ($self->{insideCast}) + { + $origtext =~ s/\[endline\]//g; + $origtext =~ s/
| +//g; + if ($origtext =~ /de(.*)avec(.*)/) + { + $self->{curInfo}->{director} = $1; + $self->{curInfo}->{actors} = $2; + } + $self->{insideCast} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 0, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{</?b>}{}g; + $html =~ s/<br>/\[endline\]/gi; + $html =~ s{<span style="text-transform\:uppercase;">([^<]*)</span>} + {$1}g; + $html =~ s{<div style="float\:left;width\:100px">([^<]*)</div>[^<]*<div style="float\:left;">([^<]*)</div>} + {<div class="movie_infos">$1 \: $2</div>}g; + $html =~ s{<a href="http\://cinema\.tiscali\.fr/recherche\.aspx\?file=http&keys=[^"]*">([^<]*)</a>} + {$1}g; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://cinema.tiscali.fr/recherche.aspx?file=http&keys=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url unless $url eq ''; + return "http://cinema.tiscali.fr/"; + } + + sub getName + { + return "MonsieurCinema.com"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + return "utf8"; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMovieMeter.pm b/lib/gcstar/GCPlugins/GCfilms/GCMovieMeter.pm new file mode 100644 index 0000000..f325817 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMovieMeter.pm @@ -0,0 +1,429 @@ +package GCPlugins::GCfilms::GCMovieMeter; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# Copyright 2007 Petr Gajdusek (Pajdus) <gajdusek.petr@centrum.cz> +# Copyright 2007 Mattias de Hollander (MaTiZ) <mdehollander@gmail.com> +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginMovieMeter; + + use HTTP::Cookies; + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + my $url; + + # Your code here + + my $response = + $self->{ua}->post("http://www.moviemeter.nl/film/search", [ 'search[title]' => $word ]); + $url = return "http://www.moviemeter.nl/film/searchresults/"; + + return $url; + } + + # getItemUrl + # Used to get the full URL of a movie page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.moviemeter.nl'; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + #return "WINDOWS-1250"; + return "ISO-8859-1"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "MovieMeter.nl"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'MaTiZ'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'NL'; + } + + # hasSearchYear + # Used to hide year column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchYear + { + return 1; + } + + # hasSearchDirector + # Used to hide director column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchDirector + { + return 0; + } + + # hasSearchActors + # Used to hide actors column in search results + # Return 0 to hide column, 1 to show it. + sub hasSearchActors + { + return 0; + } + + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return 'Original Title'; + #return ''; + } + + # changeUrl + # Can be used to change URL if movie URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + + # Your code for processing search results here + if ($tagname eq "a") + { + if ($attr->{href} =~ m/\/film\/[0-9]+/) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + elsif ($tagname eq "div") + { + if ($attr->{class} =~ /filmresults/) + { + $self->{isYear} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} =~ /subtext/) + { + $self->{altTitle} = 1; + } + } + } + else + { + # Your code for processing movie information here + if ($tagname eq "h1") + { + $self->{insideName} = 1; + } + elsif ($tagname eq "img") + { + if ($attr->{class} eq "poster") + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + elsif ($tagname eq "a") + { + if ($self->{insideFilmInfo}) + { + if ($attr->{href} =~ /director/) + { + $self->{insideFilmDir} = 1; + $self->{filminfo_dir} += 1; + } + } + } + elsif ($tagname eq "div") + { + if ($attr->{id} eq "film_info") + { + $self->{insideFilmInfo} = 1; + $self->{filminfo_id} = 0; + } + elsif ($attr->{id} eq "beslistresults") + { + $self->{insideFilmInfo} = 0; + } + elsif ($attr->{id} eq "film_votes") + { + $self->{insideRating} = 1; + } + + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{parsingList}) + { + # Your code for processing search results here + + } + else + { + # Your code for processing movie information here + + # optional + if ($tagname eq "div") + { + if ($self->{insideRating}) + { + $self->{insideRating} = 0; + } + } + elsif ($tagname eq "a") + { + if ($self->{insideFilmDir}) + { + $self->{insideFilmDirOUT} = 1; + $self->{insideFilmDir} = 0; + } + } + } + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + # Your code for processing search results here + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isYear}) + { + # Remove brackets from year: from (2002) to 2002 + $origtext =~ s/(\)|\()//g; + # Remove leading or trailing whitespace + $origtext =~ s/^\s+|\s+$//g; + $self->{itemsList}[ $self->{itemIdx} ]->{"date"} = $origtext; + $self->{isYear} = 0; + } + elsif ($self->{altTitle}) + { + $origtext =~ /Alternatieve titel:\s(.*)/; + $self->{itemsList}[ $self->{itemIdx} ]->{"extra"} = + $self->{itemsList}[ $self->{itemIdx} ]->{"title"}; + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $1; + $self->{altTitle} = 0; + } + + } + else + { + # Your code for processing movie information here + if ($self->{insideName}) + { + # First try to use the search results information, otherwise + # parse the movie information + my $title = $self->{itemsList}[ $self->{wantedIdx} ]->{"title"}; + if ($title) + { + $self->{curInfo}->{title} = $title; + $self->{curInfo}->{date} = $self->{itemsList}[ $self->{wantedIdx} ]->{"date"}; + $self->{curInfo}->{original} = + $self->{itemsList}[ $self->{wantedIdx} ]->{"extra"}; + } + + else + { + # Split Little Miss Sunshine (2006) into title and year + my ($title, $year) = ($origtext =~ /(\D+)\s\((\d+)\)/); + $self->{curInfo}->{title} = $title; + $self->{curInfo}->{date} = $year; + } + $self->{insideName} = 0; + } + elsif ($self->{insideFilmInfo}) + { + $self->{filminfo_id} += 1; + # Country Genre Time + if ($self->{filminfo_id} == 2) + { + my @parts = split("\n", $origtext); + $self->{curInfo}->{country} = $parts[0]; + my $genre = $parts[1]; + $genre =~ s/\s\/\s/,/; + $self->{curInfo}->{genre} = $genre; + my $time = $parts[2]; + $time =~ s/\sminuten//; + $self->{curInfo}->{time} = $time; + } + # Director + elsif ($self->{insideFilmDir}) + { + if (exists $self->{curInfo}->{director}) + { + $self->{curInfo}->{director} = + $self->{curInfo}->{director} . ", " . $origtext; + } + else + { + $self->{curInfo}->{director} = $origtext; + + } + } + if ($origtext =~ s/\nmet\s//) + { + my @parts = split("\n\n", $origtext); + $self->{curInfo}->{synopsis} = $parts[1]; + $parts[0] =~ s/ en /, /; + foreach my $actor (split("\s*,\s*", $parts[0])) + { + push @{$self->{curInfo}->{actors}}, [$actor] + if $self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS; + $self->{actorsCounter}++; + } + } + } + elsif ($self->{insideRating}) + { + # Use a dot instead of a comma as decimal seperator + $origtext =~ s/,/./; + # Scale rating to a maximum of 10 + # and round to integer + $self->{curInfo}->{ratingpress} = int($origtext * 2 + 0.5); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{ua}->cookie_jar(HTTP::Cookies->new); + + # Do your init stuff here + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # replace <BR> and <P> tags with \n (also, </BR>,</P>, <P/>, <BR/> ) + $html =~ s/\<(\/)?(BR|P)(\s*\/)?\>/\n/mgi; + + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCMoviecovers.pm b/lib/gcstar/GCPlugins/GCfilms/GCMoviecovers.pm new file mode 100644 index 0000000..f1a5e6c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCMoviecovers.pm @@ -0,0 +1,246 @@ +package GCPlugins::GCfilms::GCMoviecovers; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginMoviecovers; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if (($attr->{href} =~ /^\/film\/titre_/) && ($self->{inside}->{li})) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "img") + { + my $src = $attr->{src}; + my $alt = $attr->{alt}; + if (!$self->{curInfo}->{image}) + { + if ($alt =~ /^Recto/) + { + $src =~ s/http\:\/\/www\.moviecovers\.com\/DATA\/thumbs\/films\-[A-Za-z0-9-]+\/(.*)/$1/; + $self->{curInfo}->{image} = + "http://data.moviecovers.com/DATA/zipcache/" . $src; + } + } + } + } + + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + + if ($self->{inside}->{li}) + { + my $element = undef; + if ($origtext =~ /^ \([0-9]{4}\)/) + { + $origtext =~ s/ \(([0-9]{4})\)/$1/; + $element = "date"; + $self->{isInfo} = 0; + } + $self->{itemsList}[ $self->{itemIdx} ]->{$element} = $origtext + if $element; + } + + } + else + { + + if ($self->{inside}->{title}) + { + $self->{curInfo}->{title} = $origtext if length($origtext) > 2; + } + + if ($self->{inside}->{td}) + { + if ($self->{insideOriginal}) + { + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif (($self->{insideGenre}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{genre} = $origtext; + $self->{insideGenre} = 0; + } + elsif (($self->{insideDirector}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif (($self->{insideNat}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{country} = $origtext; + $self->{insideNat} = 0; + } + elsif ($self->{insideTime}) + { + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideDate} && ($self->{inside}->{a})) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif (($self->{insideActors}) && ($self->{inside}->{a})) + { + $self->{curInfo}->{actors} .= $origtext . ', ' + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + } + elsif ($self->{insideSynopsis}) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + } + } + if ($self->{inside}->{th}) + { + $self->{insideDirector} = 1 if $origtext =~ m/Réalisateur/; + $self->{insideActors} = 1 if $origtext =~ m/Acteurs principaux/; + $self->{insideGenre} = 1 if $origtext =~ m/Genre/; + $self->{insideTime} = 1 if $origtext =~ m/Durée/; + $self->{insideNat} = 1 if $origtext =~ m/Nationalité/; + $self->{insideDate} = 1 if $origtext =~ m/Année/; +# $self->{insideSynopsis} = 1 if $origtext =~ m/Résumé/; + $self->{insideOriginal} = 1 if $origtext =~ m/Titre original/; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.moviecovers.com/multicrit.html?titre=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.moviecovers.com" . $url; + } + + sub getName + { + return "MovieCovers.com"; + } + + sub getAuthor + { + return 'Patrick Fratczak'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + return "ISO-8859-1"; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCNasheKino.pm b/lib/gcstar/GCPlugins/GCfilms/GCNasheKino.pm new file mode 100644 index 0000000..9e093c7 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCNasheKino.pm @@ -0,0 +1,222 @@ +package GCPlugins::GCfilms::GCNasheKino; + +################################################### +# +# Copyright 2005-2009 zserghei +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginNasheKino; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + my $url = $attr->{href}; + if ($attr->{class} eq "ab10" && $url =~ m/\/data.movies\?id/) + { + $self->{isMovie} = 1; + $self->{isDate} = 2; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + if ( $tagname eq "a" + && $attr->{class} eq "ab10" + && $self->{isDate} == 2) + { + $self->{isDate} = 1; + } + } + else + { + if ( $tagname eq "a" + && $attr->{class} eq "ab10" + && $self->{inside}->{h1}) + { + $self->{insideDate} = 1; + } + } + } + + sub text + { + my ($self, $origtext) = @_; + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + return; + } + elsif ($self->{isDate} == 1) + { + if ($origtext =~ m/([0-9]+)\sг/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + $self->{isDate} = 0; + } + } + } + else + { + utf8::decode($origtext); + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{inside}->{h1}) + { + $self->{curInfo}->{title} = $origtext + if !$self->{curInfo}->{title}; + } + if ($self->{insideDate}) + { + if ($origtext =~ m/([0-9]+)\sг/) + { + $self->{curInfo}->{date} = $1; + if ($self->{curInfo}->{date} < 1992) + { + $self->{curInfo}->{country} = "СССР"; + } + else + { + $self->{curInfo}->{country} = "Россия"; + } + $self->{curInfo}->{audio} = "русский"; + $self->{insideDate} = 0; + } + } + if ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + } + elsif ($self->{insideSynopsis}) + { + if ($origtext =~ m/\S+/) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 0; + } + } + elsif ($self->{insideActors}) + { + $self->{insideActors} = 0 if $origtext =~ m/Сценарий:/; + if ( $origtext !~ m/^,/ + && $self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + $self->{curInfo}->{actors} .= ( + $self->{curInfo}->{actors} + ? ", " . $origtext + : $origtext + ); + $self->{actorsCounter}++; + } + } + $self->{insideDirector} = 1 if $origtext =~ m/Режиссер\(ы\):/; + $self->{insideActors} = 1 if $origtext =~ m/Актер\(ы\):/; + $self->{insideSynopsis} = 1 if $origtext =~ m/О\sфильме:/; + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub getName + { + return "NasheKino"; + } + + sub getAuthor + { + return 'zserghei'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "Windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.nashekino.ru/data.find?t=0&yr=&sval=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.nashekino.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + $self->{parsingEnded} = 0; + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOFDb.pm b/lib/gcstar/GCPlugins/GCfilms/GCOFDb.pm new file mode 100644 index 0000000..511ec4e --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOFDb.pm @@ -0,0 +1,304 @@ +package GCPlugins::GCfilms::GCOFDb; + +################################################### +# +# Copyright 2005-2010 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginOFDb; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ( ($attr->{href} =~ m/view\.php\?page=film&fid=[0-9]*/) + || ($attr->{href} =~ m|^film/[0-9]*|)) + { + $self->{isTitle} = 1; + $self->{isInfo} = 0; + $self->{isOriginal} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + } + elsif ($tagname eq "font") + { + if ($self->{isInfo}) + { + $self->{isOriginal} = 1; + } + } + } + else + { + if ($tagname eq "font") + { + if ($attr->{face} eq "Arial,Helvetica,sans-serif") + { + if ($attr->{size} eq "3") + { + $self->{insideName} = 1; + } + elsif ($attr->{size} eq "2") + { + $self->{insideInfosNames} = 1 if $attr->{class} eq "Normal"; + $self->{insideInfos} = 1 if $attr->{class} eq "Daten"; + } + } + } + elsif ($tagname eq "img") + { + if ($attr->{src} =~ m|img\.ofdb\.de/film/[0-9]+/[0-9]*.jpg|) + { + $self->{curInfo}->{image} = $attr->{src} + if !$self->{curInfo}->{image}; + } + elsif ($attr->{src} eq "images/design3/notenspalte.png") + { + $self->{curInfo}->{ratingpress} = int( $attr->{alt} + 0.5 ) + if ! $self->{curInfo}->{ratingpress}; + } + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/view\.php\?page=blaettern&Kat=Land&Text=(.*)/) + { + $self->{insideCountry} = 1; + } + $self->{curInfo}->{date} = $1 + if ($attr->{href} =~ m/view\.php\?page=blaettern&Kat=Jahr&Text=([0-9]{4})/); + } + elsif (($tagname eq "div") && ($attr->{class} eq "synopsis")) + { + $self->{insideSynopsis} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq "tr") + { + $self->{insideDirector} = 0; + $self->{insideActors} = 0; + $self->{insideGenre} = 0; + $self->{insideInfos} = 0; + } + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{"title"} = $origtext; + $self->{isTitle} = 0; + $self->{isInfo} = 1; + return; + } + elsif ($self->{isOriginal}) + { + $origtext =~ s{^\s*/\s*}{}; + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $origtext; + $self->{isOriginal} = 0; + return; + } + elsif (($self->{isInfo}) && ($origtext =~ m/\((\d{4})\)/)) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideName}) + { + $self->{curInfo}->{title} = $origtext if !$self->{curInfo}->{title}; + $self->{insideName} = 0; + } + elsif ($self->{insideInfosNames}) + { + $self->{insideOrig} = 1 if $origtext =~ m/Originaltitel:/; + $self->{insideDirector} = 1 if $origtext =~ m/Regie:/; + $self->{insideActors} = 1 if $origtext =~ m/Darsteller:/; + $self->{insideGenre} = 1 if $origtext =~ m/Genre\(s\):/; + $self->{insideInfosNames} = 0; + } + elsif ($self->{insideCountry}) + { + $self->{curInfo}->{country} .= ', ' if $self->{curInfo}->{country}; + $self->{curInfo}->{country} .= $origtext; + $self->{insideCountry} = 0; + } + elsif ($self->{insideInfos} && $self->{inside}->{font}) + { + if ($self->{insideOrig}) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOrig} = 0; + $self->{insideInfos} = 0; + } + elsif ($self->{insideDirector}) + { + $self->{curInfo}->{director} .= + $self->{curInfo}->{director} + ? ', ' . $origtext + : $origtext; + } + elsif ($self->{insideActors}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if $self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS; + $self->{actorsCounter}++; + } + elsif ($self->{insideGenre}) + { + push @{$self->{curInfo}->{genre}}, [$origtext]; + } + } + elsif ($self->{insideSynopsis}) + { + $origtext =~ m/(http.*?)(\s|$)/; + my $page = $self->loadPage($1, 0, 1); + $page =~ m|<font face="Arial,Helvetica,sans-serif" size="2" class="Blocksatz">.*?</a><br>[^<]*</b>(?:</b>)?<br><br>(.*?)</font>|ms; + $self->{curInfo}->{synopsis} = $1; + $self->{curInfo}->{synopsis} =~ s/<br \/>/\n/gi; + $self->{insideSynopsis} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + }; + + $self->{isInfo} = 0; + $self->{isYear} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|onmouseover="[^"]*"||gms; + } + $html =~ s{<a href="view\.php\?page=liste&Name=[^"]*">([^<]*)</a>} + {$1}g; + $html =~ s{<a href="view\.php\?page=genre&Genre=[^"]*">([^<]*)</a>} + {$1}g; + $html =~ s{<font face="Arial,Helvetica,sans-serif" size="2" class="Blocksatz"><p class="Blocksatz"><b>Inhalt:<\/b>\s?([^<]*)<a href="(view\.php\?page=inhalt&fid=[0-9]*&sid=[0-9]*)">\s?<b>\[mehr\]</b></a></p></font>} + {<div class="synopsis">$1\nhttp://www.ofdb.de/$2</div>}; + $html =~ s{<font face="Arial,Helvetica,sans-serif" size="2" class="Blocksatz"><p\s*class="Blocksatz"><b>Inhalt:</b>\s?([^<]*)<a href="(plot/[0-9]*[^"]*)">\s?<b>\[mehr\]</b></a></p></font>} + {<div class="synopsis">$1\nhttp://www.ofdb.de/$2</div>}gm; + $html =~ s{%DF}{ss}; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # if $word looks like an EAN, do a EAN search, otherwise title search + my $kat = ($word =~ /^[\dX]{8}[\dX]*$/) ? "EAN" : "Titel"; + + return "http://www.ofdb.de/view.php?page=suchergebnis&Kat=$kat&SText=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + utf8::decode($url); + return 'http://www.ofdb.de/' . $url; + } + + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + sub getSearchCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getName + { + return "OFDb.de"; + } + + sub getAuthor + { + return 'MeV'; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOdeonHU.pm b/lib/gcstar/GCPlugins/GCfilms/GCOdeonHU.pm new file mode 100644 index 0000000..0bd78c8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOdeonHU.pm @@ -0,0 +1,305 @@ +package GCPlugins::GCfilms::GCOdeonHU; + +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginOdeonHU; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ m:(kat.phtml\?id=.*):) + { #? + my $url = '/' . $1; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + } + } + else + { + if ($tagname eq "span") + { + $self->{insideTitle} = ($attr->{class} eq "ver11 modB colDD0008"); + } + elsif ($tagname eq "td") + { + $self->{insideFieldName} = ($attr->{class} eq "ver9 col102643"); + $self->{insideFieldValue} = ($attr->{class} eq "ver11 colblack"); + $self->{insidePersonType} = ($attr->{class} eq "ver9 col1D5263 pad5"); + $self->{insideSynopsis} = ($attr->{class} eq "ver11 col102643 pad2"); + $self->{insideRating} = ($attr->{class} eq "text_cat_score"); + + if ($self->{insideSynopsis} + && (length($self->{curInfo}->{synopsis}) > 20)) + { + $self->{insideSynopsis} = 0; + } + } + elsif ($tagname eq "img") + { + #if (! $self->{curInfo}->{image}) { + if ($attr->{src} =~ m:img/album/.*\.jpg$:) + { + my $img = 'http://odeon.hu/'; + $img .= $attr->{src}; + $self->{curInfo}->{image} = $img; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + if ($tagname eq "a") + { + $self->{isMovie} = 0; + } + if ($tagname eq "td") + { + $self->{insideFieldName} = 0; + $self->{insideFieldValue} = 0; + $self->{insidePersonType} = 0; + $self->{insideSynopsis} = 0; + $self->{insideRating} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + #return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + if ($self->{inside}->{b}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext; + return; + } + else + { + if ($origtext =~ m/\[(.*),\s+([0-9]+)\]/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $2; + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $1; + $self->{isMovie} = 0; + } + } + } + } + else + { + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + return; + } + + if ($self->{insideFieldName}) + { + $self->{FieldName} = "original" if $origtext =~ m/^eredeti/; + $self->{FieldName} = "date" if $origtext =~ m/^..?v:/; + $self->{FieldName} = "country" if $origtext =~ m/^nemzet:/; + $self->{FieldName} = "time" if $origtext =~ m/^hossz:/; + $self->{FieldName} = "todo" if $origtext =~ m/^k..?p:/; + $self->{FieldName} = "todo" if $origtext =~ m/^kiad/; + $self->{FieldName} = "todo" if $origtext =~ m/^dial..?gus:/; + $self->{FieldName} = "genre" if $origtext =~ m/^m..?faj:/; + + $self->{insideFieldName} = 0; + return; + } + + if ($self->{insideFieldValue}) + { + my $txt = $origtext; + my $name = $self->{FieldName}; + $txt =~ s/^\s*//; + $txt =~ s/\s*$//; + $txt =~ s/\s+/ /g; + $txt =~ s/\s*perc$// if $name eq "time"; + return + if $txt =~ m/^\s*$/; + + if ($self->{curInfo}->{$name} !~ m/^\s*$/) + { + $self->{curInfo}->{$name} .= "," . $txt; + } + else + { + $self->{curInfo}->{$name} = $txt; + } + + return; + } + + if ($self->{insidePersonType}) + { + if ($self->{inside}->{b}) + { + my $name = 0; + $name = "director" if $origtext =~ m/^Rendez/; + $name = "actors" if $origtext =~ m/^Szerepl/; + if ($name) + { + $self->{PersonType} = $name; + } + else + { + $self->{insidePersonType} = 0; + } + return; + } + elsif ($self->{inside}->{a}) + { + my $name = $self->{PersonType}; + if ($self->{curInfo}->{$name} !~ m/^\s*$/) + { + $self->{curInfo}->{$name} .= "," . $origtext; + } + else + { + $self->{curInfo}->{$name} = $origtext; + } + #$self->{curInfo}->{actors} .= $origtext.', ' + #if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + #$self->{actorsCounter}++; + } + + return; + } + + if ($self->{insideSynopsis}) + { + my $txt = $origtext; + $txt =~ s/\r/\n/g; + $txt =~ s/^\s+//g; + $txt =~ s/\s+$//g; + $self->{curInfo}->{synopsis} .= $txt; + } + if ($self->{insideRating}) + { + $self->{curInfo}->{ratingpress} = int($origtext + 0.5) + if $origtext =~ /^[0-9.]+$/; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + }; + + $self->{isMovie} = 0; + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + if ($self->{parsingList}) + { + $html =~ s{</?span[^>]*>}{}gi; # remove all <span> tags + } + + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://odeon.hu/kat.phtml?". + "search=$word&scat=5&btn_hirlev.x=13&btn_hirlev.y=5"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.odeon.hu$url"; + } + + sub getName + { + return "odeon.hu"; + } + + sub getAuthor + { + return 'Anonymous'; + } + + sub getLang + { + return 'HU'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm b/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm new file mode 100644 index 0000000..29eef60 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm @@ -0,0 +1,327 @@ +# +# More information here: http://wiki.gcstar.org/en/websites_plugins +# +# GCcollection should be replaced with the kind of collection your +# plugin deals with. e.g. GCfilms, GCgames, GCbooks,... + +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCcollection::GCOnet; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginOnet; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + # Your code here + + return "http://film.onet.pl/filmoteka.html?S=$word"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + # Your code here + + return "http://film.onet.pl/" . $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + return "ISO-8859-2"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "Onet"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Marek Cendrowicz'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'PL'; + } + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return ""; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{inside}->{list_title} && $tagname eq 'a') + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + $self->{listTitle} = 1; + } + } + else + { + if ($attr->{class} eq 'tyw') + { + $self->{itemTitle} = 1; + } + elsif ($tagname eq 'div' && $attr->{class} eq 'a2') + { + $self->{itemDescription} = 1; + } + elsif ($attr->{class} eq 'item_actor') + { + $self->{itemActor} = 1; + } + elsif ($tagname eq 'img' + && $attr->{class} eq 'pic' + && ($attr->{alt} eq 'Galeria' || $attr->{alt} eq 'Plakat')) + { + $self->{curInfo}->{image} = "http://film.onet.pl/" . $attr->{src}; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{listTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{listTitle} = 0; + } + elsif ($self->{inside}->{list_date}) + { + ($self->{itemsList}[ $self->{itemIdx} ]->{date}) = ($origtext =~ m/,\s+(\d+)$/); + } + } + else + { + if ($self->{itemTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{itemTitle} = 0; + } + elsif ($self->{inside}->{item_country}) + { + ($self->{curInfo}->{original}) = ($origtext =~ m/(.*)\s+\(/); + ($self->{curInfo}->{country}, $self->{curInfo}->{date}) = + ($origtext =~ m/(\w+),\s+(\d+)\)/); + $origtext =~ s|/|, |g; + ($self->{curInfo}->{genre}) = ($origtext =~ m/\)(.*)/); + } + elsif ($self->{inside}->{item_time}) + { + ($self->{curInfo}->{time}, $self->{curInfo}->{age}) = + ($origtext =~ m/czas\s+(\d+).*\s+od\s+(\d+)/); + } + elsif ($self->{inside}->{item_director}) + { + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{itemDescription}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{itemDescription} = 0; + } + elsif ($self->{itemActor}) + { + $self->{curInfo}->{actors} .= + $self->{curInfo}->{actors} ? ", " . $origtext : $origtext; + $self->{itemActor} = 0; + } + elsif ($self->{inside}->{item_rating}) + { + ($self->{curInfo}->{ratingpress}) = int($origtext * 2 + 0.5); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # This member should be initialized as a reference + # to a hash. Each keys is a field that could be + # in results with value 1 or 0 if it is returned + # or not. For the list of keys, check the model file + # (.gcm) and search for tags <field> in + # /collection/options/fields/results + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = ''; + + # Do your init stuff here + bless($self, $class); + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{<B>(.*?)</B>}{$1}gms; + + if ($self->{parsingList}) + { + + $html =~ s{<TD class=a2 width="100%">(.*?)</TD>} + {<list_title>$1</list_title>}gms; + $html =~ s{<FONT class=a0 color="#993300">(.*?)</FONT>} + {<list_date>$1</list_date>}gms; + } + else + { + $html =~ s{<BR>}{}g; + $html =~ s{<TD class=a2 valign=top width="100%">(.*?)<} + {<item_country>$1</item_country><}gms; + $html =~ s{<SPAN class=a1>(.*?)</SPAN>} + {<item_time>$1</item_time>}gms; + $html =~ s{Re.yseria: (.*?)Scenariusz} + {<item_director>$1</item_director>}gms; + $html =~ s{Re.yseria: (.*?)wi.cej} + {<item_director>$1</item_director>}gms; + $html =~ s{a2><A class=u} + {a2><A class=item_actor}gms; + $html =~ s{Ocena filmu.*([0-9]\.[0-9]+)/5} + {<item_rating>$1</item_rating>}gms; + } + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCPortHU.pm b/lib/gcstar/GCPlugins/GCfilms/GCPortHU.pm new file mode 100644 index 0000000..e460584 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCPortHU.pm @@ -0,0 +1,343 @@ +package GCPlugins::GCfilms::GCPortHU; + +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginPortHU; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{href} =~ m:(/pls/fi/films.film_page.*):) + { + if ($self->{insideBoldText}) + { + my $url = $1; + $self->{isMovie} = 1; + $self->{isInfo} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + else + { + $self->{isMovie} = 0; + $self->{isInfo} = 0; + } + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} eq "txt") + { + $self->{isInfo}++ + if $self->{isInfo}; + } + $self->{insideBoldText} = ($attr->{class} eq "btxt"); + } + } + else + { + if ($tagname eq "img") + { + if ( ($attr->{class} eq "object_picture") + && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = $attr->{src}; + $self->{insideOtherTitles} = 0; + $self->{insideDescription} = 1; + } + } + elsif ($tagname eq "div") + { + if (($attr->{class} eq "separator") + && $self->{insideActors}) + { + $self->{insideActors} = 0; + $self->{insideSynopsis} = 1; + } + elsif (($attr->{class} eq "object_picture") + && (!$self->{curInfo}->{image})) + { + $attr->{style} =~ m/url\(([^\)]*)\)/; + $self->{curInfo}->{image} = $1; + $self->{insideOtherTitles} = 0; + $self->{insideDescription} = 1; + } + } + elsif ($tagname eq "span") + { + if ($attr->{class} eq "blackbigtitle") + { + $self->{insideTitle} = 1; + } + elsif ($attr->{class} eq "btxt") + { + $self->{insideBoldText} = 1; + } + else + { + $self->{insideBoldText} = 0; + } + $self->{insideNormalText} = ($attr->{class} eq "txt"); + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if length($origtext) < 2; + + $origtext =~ s/"/"/g; + $origtext =~ s/³/3/g; + $origtext =~ s/&#[0-9]*;//g; + $origtext =~ s/\n//g; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isMovie}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isMovie} = 0; + $self->{isInfo} = 1; + return; + } + if ($self->{isInfo} == 1) + { + if ($origtext =~ m/\((.*)\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{original} = $1; + } + $self->{isInfo} = 0 + if $origtext =~ m/^ $/; + } + if ($self->{isInfo} == 2) + { + if ($origtext =~ m/([0-9]+)\)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + if ($origtext =~ m/([0-9]+)\sperc/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{time} = $1; + } + $self->{isInfo} = 0; + } + } + else + { + if ($self->{insideTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + $self->{insideOtherTitles} = 1; + $self->{insideDescription} = 1; + return; + } + if ( $self->{insideOtherTitles} + && $self->{insideNormalText}) + { + if ($origtext =~ m/\((.*)\)/) + { + $self->{curInfo}->{original} = $1; + } + $self->{insideOtherTitles} = 0; + return; + } + if ( $self->{insideDescription} + && $self->{insideBoldText}) + { + if ($origtext =~ m/([0-9]+)\s+perc/) + { + $self->{curInfo}->{time} = $1; + } + if ($origtext =~ m/([0-9]+)$/) + { + $self->{curInfo}->{date} = $1; + } + if ($origtext =~ m/^([0-9]+)\s+�v/) + { + $self->{curInfo}->{age} = $1; + } + } + + if ($origtext =~ m/^rendez/) + { + $self->{insideDirector} = 1; + $self->{insideOtherTitles} = 0; + $self->{insideDescription} = 0; + return; + } + if ($self->{insideDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{insideDirector} = 0; + return; + } + + if ($origtext =~ m/^szerepl/) + { + $self->{insideActors} = 1; + return; + } + if ($self->{insideActors}) + { + if ($self->{inside}->{a}) + { + push @{$self->{curInfo}->{actors}}, [$origtext] + if ($self->{actorsCounter} < + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + $self->{actorsCounter}++; + return; + } + elsif ($origtext =~ m/\((.*)\)/) + { + # As we incremented it above, we have one more chance here to add a role + # Without <= we would skip the role for last actor + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} - 1 ]}, + $1 + if ($self->{actorsCounter} <= + $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS); + } + } + + if ( $origtext =~ m/^(Linkek|Bemutat|Aj�nl�k)/ + && $self->{insideBoldText}) + { + $self->{parsingEnded} = 1; + $self->{insideSynopsis} = 0; + return; + } + + if ( $self->{insideSynopsis} + && $self->{insideNormalText} + && $self->{inside}->{span} + && !$self->{inside}->{a}) + { + ($self->{curInfo}->{synopsis} .= $origtext) =~ s/^\s*//; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + original => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActors} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|</a></b><br>|</a><br>|; + + $self->{insideDescription} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActors} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $base_url = 'http://www.port.hu/pls/ci/cinema.film_creator'; + return "$base_url?i_text=$word&i_film_creator=1"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.port.hu$url"; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getName + { + return "port.hu"; + } + + sub getAuthor + { + return 'Anonymous'; + } + + sub getLang + { + return 'HU'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCStopklatka.pm b/lib/gcstar/GCPlugins/GCfilms/GCStopklatka.pm new file mode 100644 index 0000000..8f4290e --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCStopklatka.pm @@ -0,0 +1,355 @@ +# +# More information here: http://wiki.gcstar.org/en/websites_plugins +# +# GCcollection should be replaced with the kind of collection your +# plugin deals with. e.g. GCfilms, GCgames, GCbooks,... + +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCcollection::GCStopklatka; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginStopklatka; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + # Your code here + + return "http://www.stopklatka.pl/szukaj/szukaj.asp?kategoria=film&szukaj=$word"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + # Your code here + + return "http://www.stopklatka.pl/" . $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + return "ISO-8859-2"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "Stopklatka"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Marek Cendrowicz'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'PL'; + } + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return ""; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{inside}->{list_details}) + { + if ($tagname eq 'a') + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + } + + $self->{listTitle} = 1 + if ($attr->{class} eq 'searchTitle textB'); + + $self->{listYear} = 1 + if ($attr->{class} eq 'searchTitle'); + } + } + else + { + if ($attr->{class} eq 'wydarzenie_tytul') + { + $self->{pre_itemTitle} = 1; + } + elsif ($self->{pre_itemTitle}) + { + if ($tagname eq 'h1') + { + $self->{itemTitle} = 1; + } + elsif ($tagname eq 'h2') + { + $self->{itemOriginalTitle} = 1; + } + elsif ($tagname eq 'table') + { + $self->{pre_itemTitle} = 0; + } + } + elsif ($attr->{class} eq 'film_pozycja') + { + $self->{pre_itemDetails} = 1; + } + elsif ($attr->{class} eq 'main_sub_table film') + { + $self->{itemMain} = 1; + } + elsif ($tagname eq 'img' && $self->{itemMain}) + { + $self->{curInfo}->{image} = $attr->{src}; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{listTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{listTitle} = 0; + } + elsif ($self->{listYear}) + { + $origtext =~ s/\((.*?)\)/$1/gms; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext; + $self->{listYear} = 0; + } + } + else + { + if ($self->{itemTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{itemTitle} = 0; + } + elsif ($self->{itemOriginalTitle}) + { + $origtext =~ s/\s*\((.*?)\)/$1/gms; + $self->{curInfo}->{original} = $origtext; + $self->{itemOriginalTitle} = 0; + } + elsif ($self->{itemDirector}) + { + $self->{curInfo}->{director} = $origtext; + $self->{itemDirector} = 0; + $self->{pre_itemDetails} = 0; + } + elsif ($self->{itemDetails}) + { + ( + $self->{curInfo}->{genre}, $self->{curInfo}->{country}, + $self->{curInfo}->{date}, $self->{curInfo}->{time} + ) = split(/, /, $origtext); + $self->{curInfo}->{time} =~ s/ min//; + $self->{itemDetails} = 0; + $self->{pre_itemDetails} = 0; + } + elsif ($self->{itemActors}) + { + $self->{curInfo}->{actors} = $origtext; + $self->{itemActors} = 0; + $self->{pre_itemDetails} = 0; + } + elsif ($self->{itemMain}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{itemMain} = 0; + } + elsif ($self->{inside}->{item_rank}) + { + $self->{curInfo}->{ratingpress} = int($origtext + 0.5); + $self->{pre_itemDetails} = 0; + } + elsif ($self->{pre_itemDetails}) + { + $self->{itemDirector} = 1 + if ($origtext eq 'reżyseria:'); + + $self->{itemDetails} = 1 + if ($origtext eq 'szczegóły:'); + + $self->{itemActors} = 1 + if ($origtext eq 'obsada:'); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # This member should be initialized as a reference + # to a hash. Each keys is a field that could be + # in results with value 1 or 0 if it is returned + # or not. For the list of keys, check the model file + # (.gcm) and search for tags <field> in + # /collection/options/fields/results + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = ''; + + # Do your init stuff here + bless($self, $class); + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + # Your code to modify $html here. + if ($self->{parsingList}) + { + $html =~ + s|<!-- record_start -->(.*?)<!-- record_end -->|<list_details>$1</list_details>|gms; + } + else + { + $html =~ s|<nobr>(.*?)</nobr>|$1|gms; + $html =~ s|<span class="bold">(.*?)</span>|$1|gms; + $html =~ s|<a href="/filmowcy/osoba.*?">(.*?)</a>|$1|gms; + $html =~ +s|script type="text/javascript">document.write\(getOcena\((.*?)\)\);</script>|<item_rank>$1</item_rank>|gms; + } + return $html; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedb.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedb.pm new file mode 100644 index 0000000..f7636cc --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedb.pm @@ -0,0 +1,337 @@ +package GCPlugins::GCfilms::GCthemoviedb; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedb; + + use base 'GCPlugins::GCfilms::GCfilmsPluginsBase'; + use XML::Simple; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page !~ m/>Nothing found.<\/movie/) + { + $xml = $xs->XMLin( + $page, + ForceArray => [ 'movie', 'alternative_name' ], + KeyAttr => ['id'] + ); + my $movie; + foreach $movie (keys(%{$xml->{'movies'}->{'movie'}})) + { + # We only want movies, not series and everything else the api returns + if ($xml->{'movies'}->{'movie'}->{$movie}->{'type'} eq "movie") + { + $self->{itemIdx}++; + my $url = +"http://api.themoviedb.org/2.1/Movie.getInfo/".$self->siteLanguage()."/xml/9fc8c3894a459cac8c75e3284b712dfc/" + . $movie; + # If the release date is missing, it will be returned as an array, so only save the release if + # it's not an array + my $released = ""; + if (!ref($xml->{'movies'}->{'movie'}->{$movie}->{'released'})) + { + $released = $xml->{'movies'}->{'movie'}->{$movie}->{'released'}; + } + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $released; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = + $xml->{'movies'}->{'movie'}->{$movie}->{'name'}; + # Now, check if there's any alternative names, and if so, add them in as + # additional search results. + for my $alternateName ( + @{$xml->{'movies'}->{'movie'}->{$movie}->{alternative_name}}) + { + if (!ref($alternateName)) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $released; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $alternateName; + } + } + + } + } + } + } + else + { + $xml = $xs->XMLin( + $page, + ForceArray => [ 'country', 'person', 'category', 'size', 'alternative_name' ], + KeyAttr => [''] + ); + my $usingAlternateName = 0; + if ( + ( + $xml->{movies}->{movie}->{name} ne + $self->{itemsList}[ $self->{wantedIdx} ]->{title} + ) + && ($self->{itemsList}[ $self->{wantedIdx} ]->{title}) + ) + { + # Name returned by tmdb is different to the one the user selected + # this means they choose an translated name, so use the name they choose + # as the default, and put tmdb's name in as the original (untranslated) name of the movie + $self->{curInfo}->{title} = $self->{itemsList}[ $self->{wantedIdx} ]->{title}; + $self->{curInfo}->{original} = $xml->{movies}->{movie}->{name}; + } + else + { + $self->{curInfo}->{title} = $xml->{movies}->{movie}->{name}; + } + # Now, add any alternate names + for my $alternateName (@{$xml->{movies}->{movie}->{alternative_name}}) + { + if ((!ref($alternateName)) && ($alternateName ne $self->{curInfo}->{title})) + { + $self->{curInfo}->{original} .= ", " + if $self->{curInfo}->{original}; + $self->{curInfo}->{original} .= $alternateName; + } + } + + $self->{curInfo}->{webPage} = $xml->{movies}->{movie}->{url}; + + # The following fields could be missing from the xml, so we need to check if they're blank + # (in which case they'll be a array) + $self->{curInfo}->{synopsis} = $xml->{movies}->{movie}->{overview} + if (!ref($xml->{movies}->{movie}->{overview})); + $self->{curInfo}->{ratingpress} = $xml->{movies}->{movie}->{rating} + if (!ref($xml->{movies}->{movie}->{rating})); + $self->{curInfo}->{date} = $xml->{movies}->{movie}->{released} + if (!ref($xml->{movies}->{movie}->{released})); + $self->{curInfo}->{time} = $xml->{movies}->{movie}->{runtime} . " mins" + if (!ref($xml->{movies}->{movie}->{runtime})); + + if (!ref($xml->{movies}->{movie}->{certification})) + { + my $certification; + $certification = $xml->{movies}->{movie}->{certification}; + $self->{curInfo}->{age} = 1 + if ($certification eq 'Unrated') || ($certification eq 'Open'); + $self->{curInfo}->{age} = 2 + if ($certification eq 'G') || ($certification eq 'Approved'); + $self->{curInfo}->{age} = 5 + if ($certification eq 'PG') + || ($certification eq 'M') + || ($certification eq 'GP'); + $self->{curInfo}->{age} = 13 if $certification eq 'PG-13'; + $self->{curInfo}->{age} = 17 if $certification eq 'R'; + $self->{curInfo}->{age} = 18 + if ($certification eq 'NC-17') || ($certification eq 'X'); + } + + for my $country (@{$xml->{movies}->{movie}->{countries}->{country}}) + { + push @{$self->{curInfo}->{country}}, $country->{name}; + } + $self->{curInfo}->{country} =~ s/, $//; + for my $person (@{$xml->{movies}->{movie}->{cast}->{person}}) + { + my $name = $person->{name}; + # Strip any blank spaces from start and end of name + $name =~ s/\s*$//; + $name =~ s/^\s*//; + if ($person->{job} eq "Director") + { + $self->{curInfo}->{director} .= $name . ', '; + } + elsif ($person->{job} eq "Actor") + { + if ($self->{actorsCounter} < $GCPlugins::GCfilms::GCfilmsCommon::MAX_ACTORS) + { + push @{$self->{curInfo}->{actors}}, [$name]; + my $role = $person->{character}; + $role =~ s/\s*$//; + $role =~ s/^\s*//; + push @{$self->{curInfo}->{actors}->[ $self->{actorsCounter} ]}, $role; + $self->{actorsCounter}++; + } + } + } + $self->{curInfo}->{director} =~ s/, $//; + for my $category (@{$xml->{movies}->{movie}->{categories}->{category}}) + { + push @{$self->{curInfo}->{genre}}, [ $category->{name} ] + if ($category->{type} eq 'genre'); + } + for my $image (@{$xml->{movies}->{movie}->{images}->{image}}) + { + if ($image->{type} eq "poster") + { + # Fetch either the big original pic, or just the small thumbnail pic + if ( (($self->{bigPics}) && ($image->{size} eq "original")) + || (!($self->{bigPics}) && ($image->{size} eq "thumb"))) + { + if (!$self->{curInfo}->{image}) + { + $self->{curInfo}->{image} = $image->{url}; + } + } + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://www.themoviedb.org"; + } + elsif (index($url, "api") < 0) + { + # Url isn't for the movie db api, so we need to find the movie id + # and return a url corresponding to the api page for this movie + my $found = index(reverse($url), "/"); + if ($found >= 0) + { + my $id = substr(reverse($url), 0, $found); + $url = +"http://api.themoviedb.org/2.1/Movie.getInfo/".$self->siteLanguage()."/xml/9fc8c3894a459cac8c75e3284b712dfc/" + . reverse($id); + } + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return +"http://api.themoviedb.org/2.1/Movie.search/".$self->siteLanguage()."/xml/9fc8c3894a459cac8c75e3284b712dfc/$word"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub siteLanguage + { + my $self = shift; + + return 'en'; + } + + sub getName + { + return "The Movie DB"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub isPreferred + { + return 1; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbDE.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbDE.pm new file mode 100644 index 0000000..bc55111 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbDE.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCfilms::GCthemoviedbDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCThemoviedb; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedbDE; + + use base qw(GCPlugins::GCfilms::GCPluginThemoviedb); + use XML::Simple; + + sub siteLanguage + { + my $self = shift; + + return 'de'; + } + + sub getName + { + return "The Movie DB (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbES.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbES.pm new file mode 100644 index 0000000..77dc03a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbES.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCfilms::GCthemoviedbES; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCThemoviedb; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedbES; + + use base qw(GCPlugins::GCfilms::GCPluginThemoviedb); + use XML::Simple; + + sub siteLanguage + { + my $self = shift; + + return 'es'; + } + + sub getName + { + return "The Movie DB (ES)"; + } + + sub getLang + { + return 'ES'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbFR.pm b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbFR.pm new file mode 100644 index 0000000..624b64f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCThemoviedbFR.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCfilms::GCthemoviedbFR; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCThemoviedb; + +{ + + package GCPlugins::GCfilms::GCPluginThemoviedbFR; + + use base qw(GCPlugins::GCfilms::GCPluginThemoviedb); + use XML::Simple; + + sub siteLanguage + { + my $self = shift; + + return 'fr'; + } + + sub getName + { + return "The Movie DB (FR)"; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCfilmsAmazonCommon.pm b/lib/gcstar/GCPlugins/GCfilms/GCfilmsAmazonCommon.pm new file mode 100644 index 0000000..a7178fa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCfilmsAmazonCommon.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCfilms::GCfilmsAmazonCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCfilms::GCfilmsCommon; +use GCPlugins::GCstar::GCAmazonCommon; + +{ + package GCPlugins::GCfilms::GCfilmsAmazonPluginsBase; + + use base ('GCPlugins::GCfilms::GCfilmsPluginsBase', 'GCPlugins::GCstar::GCPluginAmazonCommon'); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{codeField} = ''; + $self->{searchType} = 'dvd'; + + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub getEanField + { + return 'title'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCfilmsCommon.pm b/lib/gcstar/GCPlugins/GCfilms/GCfilmsCommon.pm new file mode 100644 index 0000000..d67952f --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCfilmsCommon.pm @@ -0,0 +1,70 @@ +package GCPlugins::GCfilms::GCfilmsCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +our $MAX_ACTORS = 30; +our $MAX_DIRECTORS = 4; + +use GCPlugins::GCPluginsBase; + +{ + + package GCPlugins::GCfilms::GCfilmsPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub loadUrl + { + my ($self, $url) = @_; + + $self->{actorsCounter} = 0; + $self->{directorCounter} = 0; + $self->SUPER::loadUrl($url); + + if (!$self->{curInfo}->{title} && $self->{curInfo}->{original}) + { + $self->{curInfo}->{title} = $self->{curInfo}->{original}; + $self->{curInfo}->{original} = ''; + } + return $self->{curInfo}; + } + +} + +1; |