package GCPlugins::GCfilms::GCImdb; ################################################### # # Features: # + Multiple directors separated by comma # + Multiple countries separated by comma # + Correct URL in case of redirection # + Fetches Original Title # ################################################### # # Copyright 2005-2014 Christian Jodar # # This file is part of GCstar. # # GCstar is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # GCstar is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GCstar; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA # ################################################### use strict; use GCPlugins::GCfilms::GCfilmsCommon; { package GCPlugins::GCfilms::GCPluginImdb; use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); sub start { my ($self, $tagname, $attr, $attrseq, $origtext) = @_; $self->{inside}->{$tagname}++; if ($self->{parsingEnded}) { return; } if ($self->{parsingList}) { if ($tagname eq "a") { my $url = $attr->{href}; if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url})) { $self->{isMovie} = 1; $self->{isInfo} = 1; $self->{itemIdx}++; $self->{itemsList}[$self->{itemIdx}]->{url} = $url; $self->{alreadyListed}->{$url} = 1; } } } else { if ($tagname eq "link") { if ($attr->{rel} eq "canonical") { $self->{curInfo}->{webPage} = $attr->{href}; } } elsif ($tagname eq "h1") { if ($attr->{class} eq "header") { $self->{insideHeader} = 1; } } elsif ($tagname eq "div") { if ($attr->{class} eq "infobar") { $self->{insideInfobar} = 1; } } elsif ($tagname eq "table") { if ($attr->{class} eq "cast_list") { $self->{insideCastList} = 1; } } elsif ($tagname eq "span") { if ($attr->{itemprop} eq "ratingValue") { $self->{insideRating} = 1; } elsif ($attr->{class} eq "title-extra") { $self->{insideOriginalTitle} = 1; } elsif ($self->{insideCastList}) { if ($attr->{itemprop} eq 'name') { $self->{insideActor} = 1; } } } elsif ($tagname eq "img") { if ($self->{insidePrimaryImage}) { if (!($attr->{src} =~ m/nopicture/)) { ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./; } } elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|) { my $cert = $attr->{title}; $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open'); $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved'); $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP'); $self->{curInfo}->{age} = 13 if $cert eq 'PG_13'; $self->{curInfo}->{age} = 17 if $cert eq 'R'; $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X'); } } elsif ($tagname eq "a") { if ($self->{insideHeader} && $attr->{href} =~ m/year/) { $self->{insideYear} = 1; } elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/) { $self->{insideGenre} = 1; } } elsif ($tagname eq 'td') { if ($self->{insideCastList}) { #if ($attr->{class} eq 'name') #{ #$self->{insideActor} = 1; #} if ($attr->{class} eq 'character') { $self->{insideRole} = 1; } } elsif ($attr->{id} eq "img_primary") { $self->{insidePrimaryImage} = 1; } } } } sub end { my ($self, $tagname) = @_; $self->{inside}->{$tagname}--; if ($self->{parsingList}) { if ($self->{isMovie} && ($tagname eq 'a')) { $self->{isMovie} = 0; my $url = $self->{itemsList}[$self->{itemIdx}]->{url}; if (!$self->{itemsList}[$self->{itemIdx}]->{title}) { $self->{alreadyListed}->{$url} = 0; $self->{itemIdx}--; } } } else { if ($tagname eq "h1") { $self->{insideHeader} = 0; } elsif ($tagname eq "a") { $self->{insideYear} = 0; $self->{insideGenre} = 0; $self->{insideActor} = 0; $self->{insideRole} = 0; } elsif ($tagname eq "div") { $self->{insideInfobar} = 0; $self->{insideNat} = 0; $self->{insideDirector} = 0; $self->{insideStoryline} = 0; $self->{insideReleaseDate} = 0; } elsif ($tagname eq "span") { $self->{insideRating} = 0; $self->{insideOriginalTitle} = 0; } elsif ($tagname eq "table") { $self->{insideCastList} = 0; } elsif ($tagname eq "td") { $self->{insidePrimaryImage} = 0; } elsif ($self->{insideCastList}) { if ($self->{actor} && $self->{role}) { $self->{actor} =~ s/^\s+|\s+$//g; $self->{actor} =~ s/\s{2,}/ /g; push @{$self->{curInfo}->{actors}}, [$self->{actor}]; $self->{role} =~ s/^\s+|\s+$//g; $self->{role} =~ s/\s{2,}/ /g; push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role}; $self->{actorsCounter}++; } $self->{actor} = ""; $self->{role} = ""; } } } sub text { my ($self, $origtext) = @_; return if length($origtext) < 2; $origtext =~ s/^\s+|\s+$//g; return if ($self->{parsingEnded}); if ($self->{parsingList}) { #if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i) if ($self->{inside}->{title} && $origtext !~ m/Find\s-\sIMDb/i) { $self->{parsingEnded} = 1; $self->{itemIdx} = 0; $self->{itemsList}[0]->{url} = $self->{loadedUrl}; } if ($self->{isMovie}) { $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; $self->{isMovie} = 0; $self->{isInfo} = 1; return; } if ($self->{isInfo}) { $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|; $self->{isInfo} = 0; } } else { if ($self->{insideHeader}) { if ($self->{insideYear}) { $self->{curInfo}->{date} = $origtext; } elsif (!$self->{curInfo}->{title}) { $self->{curInfo}->{title} = $origtext; if (!$self->{curInfo}->{original}) { $self->{curInfo}->{original} = $origtext; } } elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i}) { $self->{curInfo}->{original} = $origtext; } } elsif ($self->{insideInfobar}) { if ($self->{insideGenre}) { if ($self->{curInfo}->{genre}) { $self->{curInfo}->{genre} .= ","; } $self->{curInfo}->{genre} .= $origtext; } elsif ($origtext =~ m/([0-9]+ min)/) { $self->{curInfo}->{time} = $1; } } elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/) { $self->{curInfo}->{ratingpress} = int($origtext + 0.5); } elsif ($self->{insideSynopsis}) { $self->{curInfo}->{synopsis} .= $origtext; } elsif ($self->{insideNat}) { if ($origtext =~ m/[^\s].+/) { if ($self->{curInfo}->{country} =~ m/.+/) { $self->{curInfo}->{country} .= ", ".$origtext; } else { $self->{curInfo}->{country} = $origtext; } } } elsif ($self->{insideCastList}) { if ($self->{insideActor}) { $self->{actor} .= $origtext; } elsif ($self->{insideRole}) { $self->{role} .= $origtext; } } elsif ($self->{insideStoryline} && $self->{inside}{p}) { $self->{curInfo}->{synopsis} = $origtext; $self->{insideStoryline} = 0; } elsif ($self->{insideDirector} && $self->{inside}->{div}) { $origtext =~ s/,/, /; $self->{curInfo}->{director} .= $origtext; } elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) { if ($origtext =~ m/([0-9]{4})/) { $self->{curInfo}->{date} = $1; $self->{insideReleaseDate} = 0; } } if ($self->{inside}->{h2}) { $self->{insideStoryline} = 1 if ($origtext eq "Storyline"); } elsif ($self->{inside}->{h4}) { $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/; $self->{insideTime} = 1 if $origtext =~ m/Runtime:/; $self->{insideNat} = 1 if $origtext =~ m/Country:/; $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/; } } } sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = $class->SUPER::new(); bless ($self, $class); $self->{hasField} = { title => 1, date => 1, director => 0, actors => 0, }; $self->{isInfo} = 0; $self->{isMovie} = 0; $self->{curName} = undef; $self->{curUrl} = undef; return $self; } sub preProcess { my ($self, $html) = @_; $self->{parsingEnded} = 0; if ($self->{parsingList}) { $self->{alreadyListed} = {}; } else { #$html =~ s|[^<]*||gi; #$html =~ s|]*>([^<]*)|$1|gi; #$html =~ s|([^<]*)|$1|gi; #$html =~ s|([^<]*)|$1|gi; # Commented out this line, causes bug #14420 when importing from named lists #$self->{curInfo}->{actors} = []; } return $html; } sub getSearchUrl { my ($self, $word) = @_; return "http://www.imdb.com/find?s=tt&q=$word"; } sub getItemUrl { my ($self, $url) = @_; return "http://www.imdb.com" if $url eq ""; return $url if $url =~ /^http:/; return "http://www.imdb.com".$url; } sub getName { return "IMDb"; } sub getAuthor { return 'groms'; } sub getLang { return 'EN'; } } 1;