From 126bb8cb6b93240bb4d3a2b816b74c286c3d422b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 6 Jul 2014 15:20:38 +0200 Subject: Imported Upstream version 1.7.0 --- lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm | 386 ++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm (limited to 'lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm') diff --git a/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm b/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm new file mode 100644 index 0000000..d950395 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCKinopoisk.pm @@ -0,0 +1,386 @@ +package GCPlugins::GCfilms::GCKinopoisk; + +use strict; +use utf8; +use Encode qw(encode); + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + package GCPlugins::GCfilms::GCPluginKinopoisk; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + if ($self->{parsingEnded}) + { + return; + } + + if ($self->{parsingList}) + { + if ($tagname eq "a") + { + if ($attr->{class} eq "all") + { + my $url = $attr->{href}; + if ($url =~ m/\/level\/1\/film/) + { + $self->{isMovie} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + } + } + if ($attr->{class} eq "orange") + { + $self->{isYear} = 1; + } + } + elsif ($tagname eq "title") + { + $self->{insideHTMLtitle} = 1; + } + } + else + { + if ($attr->{class} eq "moviename-big" && $attr->{style} eq "margin: 0; padding: 0") + { + $self->{insideTitle} = 1; + } + elsif ($tagname eq "span") + { + if ($attr->{style} eq "color: #666; font-size: 13px") + { + $self->{insideOriginal} = 1; + } + elsif ($attr->{class} eq "_reachbanner_" && $self->{insideSynopsis} == 0) + { + $self->{insideSynopsis} = 1; + } + } + elsif ($tagname eq "a") + { + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Byear\%5D/) + { + $self->{insideDate} = 1; + } + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bcountry\%5D/) + { + if ($self->{isCountry} >= 2) + { + $self->{insideCountry} = 1; + $self->{isCountry}++; + } + } + if ($attr->{href} =~ m/\/level\/4\/people/) + { + if ($self->{isDirector} >= 2) + { + $self->{insideDirector} = 1; + $self->{isDirector}++; + } + } + if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bgenre\%5D/) + { + $self->{insideGenre} = 1; + $self->{isGenre}++; + } + if ($self->{insideActorList}) + { + $self->{isActors} += 1; + $self->{insideActors} = 1; + } + } + elsif ($tagname eq "td") + { + if ($attr->{class} eq "type") + { + $self->{isDirector} = 1; + $self->{isTime} = 1; + $self->{isCountry} = 1; + } + elsif ($self->{isTime} == 2) + { + $self->{insideTime} = 1; + $self->{isTime} = 0; + } + elsif ($attr->{style} eq "vertical-align: top; height: 15px" && $attr->{align} eq "right" && $self->{isActors} >= 0) + { + $self->{isActors} += 1; + $self->{insideActors} = 1; + } + } + elsif ($tagname eq "img" && $attr->{style} eq "border: none; border-left: 10px #f60 solid") + { + if ($attr->{src} ne "/images/image_none.gif") + { + $self->{curInfo}->{image} = "http://www.kinopoisk.ru".$attr->{src}; + } + } + } + } + + sub text + { + my ($self, $origtext) = @_; + return if ($self->{parsingEnded}); + if ($self->{parsingList}) + { + if (($self->{insideHTMLtitle})) + { + if ($origtext =~ m/Результаты\sпоиска/) + { + # + } + else + { + $self->{parsingEnded} = 1; + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + $self->{insideHTMLtitle} = 0; + } + if ($self->{isMovie}) + { + my ($title, $date); + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isMovie} = 0; + return; + } + elsif ($self->{isYear}) + { + $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; + $self->{isYear} = 0; + return; + } + } + else + { + if ($origtext =~ m/В\s*главных\s*ролях:/) + { + $self->{insideActorList} = 1; + } + if ($origtext =~ m/Роли\s*дублировали:/) + { + $self->{insideActorList} = 0; + } + if ($self->{insideTitle}) + { + $origtext =~ s/\s+$//; + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif ($self->{insideOriginal}) + { + $origtext =~ s/^\s+//; + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($self->{insideDate}) + { + $self->{curInfo}->{date} = $origtext; + $self->{insideDate} = 0; + } + elsif ($self->{insideCountry} == 1) + { + if ($self->{isCountry} == 3) + { + $self->{curInfo}->{country} = $origtext; + } + elsif ($self->{isCountry} > 3) + { + $self->{curInfo}->{country} = $self->{curInfo}->{country}.", ".$origtext; + } + $self->{insideCountry} = 0; + } + elsif ($self->{insideDirector}) + { + if ($self->{isDirector} == 3) + { + $self->{curInfo}->{director} = $origtext; + } + elsif ($self->{isDirector} > 3) + { + $self->{curInfo}->{director} = $self->{curInfo}->{director}.", ".$origtext; + } + $self->{insideDirector} = 0; + } + elsif ($self->{insideActors}) + { + if ($self->{isActors} == 1) + { + $self->{curInfo}->{actors} = $origtext; + } + elsif ($self->{isActors} > 1) + { + if ($origtext eq "...") + { + $self->{isActors} = -1; + } + else + { + $self->{curInfo}->{actors} = $self->{curInfo}->{actors}.", ".$origtext; + } + } + $self->{insideActors} = 0; + } + elsif ($self->{insideSynopsis} == 1) + { + #$origtext =~ s/^\s+//; + $self->{curInfo}->{synopsis} = $origtext; + $self->{insideSynopsis} = 2; + } + elsif ($self->{isTime} == 1 || $self->{isDirector} == 1 || $self->{isCountry} == 1) + { + $self->{isDirector} = 0; + $self->{isTime} = 0; + $self->{isCountry} = 0; + if ($origtext eq "время") + { + $self->{isTime} = 2; + } + elsif ($origtext eq "режиссер") + { + $self->{isDirector} = 2; + } + elsif ($origtext eq "страна") + { + $self->{isCountry} = 2; + } + } + elsif ($self->{insideTime}) + { + $self->{curInfo}->{time} = $origtext; + $self->{insideTime} = 0; + } + elsif ($self->{insideGenre}) + { + if ($self->{isGenre} == 1) + { + $self->{curInfo}->{genre} = $origtext; + } + elsif ($self->{isGenre} > 1) + { + $self->{curInfo}->{genre} = $self->{curInfo}->{genre}.", ".$origtext; + } + $self->{insideGenre} = 0; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + if ($self->{parsingList}) + { + # Your code for processing search results here + } + else + { + if ($tagname eq "tr" && $self->{isDirector} >= 2) + { + $self->{isDirector} = 0; + } + elsif ($tagname eq "tr" && $self->{isGenre} != 0) + { + $self->{isGenre} = 0; + } + elsif ($tagname eq "td") + { + $self->{insideActorList} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{isYear} = 0; + $self->{isDirector} = 0; + $self->{isActors} = 0; + $self->{isTime} = 0; + $self->{isGenre} = 0; + $self->{isCountry} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{insideActorList} = 0; + return $self; + } + + sub getName + { + return "Kinopoisk"; + } + + sub getAuthor + { + return 'Nazarov Pavel'; + } + + sub getLang + { + return 'RU'; + } + + sub getCharset + { + my $self = shift; + return "windows-1251"; + } + + sub getSearchCharset + { + my $self = shift; + return "windows-1251"; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.kinopoisk.ru/index.php?kp_query=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + return "http://www.kinopoisk.ru/" . $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + $html =~ s/…/\.\.\./g; + $html =~ s/\x92/'/g; + $html =~ s/\x93/“/g; + $html =~ s/\x94/”/g; + $html =~ s/—/—/g; + $html =~ s/""/'"/g; + $html =~ s/""/"'/g; + $html =~ s|
|
|; + $html =~ s/

/\x0A/g; + return $html; + } +} + +1; -- cgit v1.2.3