package GCPlugins::GCfilms::GCKinopoisk; use strict; use utf8; use Encode qw(encode); use GCPlugins::GCfilms::GCfilmsCommon; { package GCPlugins::GCfilms::GCPluginKinopoisk; use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); sub start { my ($self, $tagname, $attr, $attrseq, $origtext) = @_; $self->{inside}->{$tagname}++; if ($self->{parsingEnded}) { return; } if ($self->{parsingList}) { if ($tagname eq "a") { if ($attr->{class} eq "all") { my $url = $attr->{href}; if ($url =~ m/\/level\/1\/film/) { $self->{isMovie} = 1; $self->{itemIdx}++; $self->{itemsList}[$self->{itemIdx}]->{url} = $url; } } if ($attr->{class} eq "orange") { $self->{isYear} = 1; } } elsif ($tagname eq "title") { $self->{insideHTMLtitle} = 1; } } else { if ($attr->{class} eq "moviename-big" && $attr->{style} eq "margin: 0; padding: 0") { $self->{insideTitle} = 1; } elsif ($tagname eq "span") { if ($attr->{style} eq "color: #666; font-size: 13px") { $self->{insideOriginal} = 1; } elsif ($attr->{class} eq "_reachbanner_" && $self->{insideSynopsis} == 0) { $self->{insideSynopsis} = 1; } } elsif ($tagname eq "a") { if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Byear\%5D/) { $self->{insideDate} = 1; } if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bcountry\%5D/) { if ($self->{isCountry} >= 2) { $self->{insideCountry} = 1; $self->{isCountry}++; } } if ($attr->{href} =~ m/\/level\/4\/people/) { if ($self->{isDirector} >= 2) { $self->{insideDirector} = 1; $self->{isDirector}++; } } if ($attr->{href} =~ m/\/level\/10\/m\_act\%5Bgenre\%5D/) { $self->{insideGenre} = 1; $self->{isGenre}++; } if ($self->{insideActorList}) { $self->{isActors} += 1; $self->{insideActors} = 1; } } elsif ($tagname eq "td") { if ($attr->{class} eq "type") { $self->{isDirector} = 1; $self->{isTime} = 1; $self->{isCountry} = 1; } elsif ($self->{isTime} == 2) { $self->{insideTime} = 1; $self->{isTime} = 0; } elsif ($attr->{style} eq "vertical-align: top; height: 15px" && $attr->{align} eq "right" && $self->{isActors} >= 0) { $self->{isActors} += 1; $self->{insideActors} = 1; } } elsif ($tagname eq "img" && $attr->{style} eq "border: none; border-left: 10px #f60 solid") { if ($attr->{src} ne "/images/image_none.gif") { $self->{curInfo}->{image} = "http://www.kinopoisk.ru".$attr->{src}; } } } } sub text { my ($self, $origtext) = @_; return if ($self->{parsingEnded}); if ($self->{parsingList}) { if (($self->{insideHTMLtitle})) { if ($origtext =~ m/Результаты\sпоиска/) { # } else { $self->{parsingEnded} = 1; $self->{itemIdx} = 0; $self->{itemsList}[0]->{url} = $self->{loadedUrl}; } $self->{insideHTMLtitle} = 0; } if ($self->{isMovie}) { my ($title, $date); $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; $self->{isMovie} = 0; return; } elsif ($self->{isYear}) { $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; $self->{isYear} = 0; return; } } else { if ($origtext =~ m/В\s*главных\s*ролях:/) { $self->{insideActorList} = 1; } if ($origtext =~ m/Роли\s*дублировали:/) { $self->{insideActorList} = 0; } if ($self->{insideTitle}) { $origtext =~ s/\s+$//; $self->{curInfo}->{title} = $origtext; $self->{insideTitle} = 0; } elsif ($self->{insideOriginal}) { $origtext =~ s/^\s+//; $self->{curInfo}->{original} = $origtext; $self->{insideOriginal} = 0; } elsif ($self->{insideDate}) { $self->{curInfo}->{date} = $origtext; $self->{insideDate} = 0; } elsif ($self->{insideCountry} == 1) { if ($self->{isCountry} == 3) { $self->{curInfo}->{country} = $origtext; } elsif ($self->{isCountry} > 3) { $self->{curInfo}->{country} = $self->{curInfo}->{country}.", ".$origtext; } $self->{insideCountry} = 0; } elsif ($self->{insideDirector}) { if ($self->{isDirector} == 3) { $self->{curInfo}->{director} = $origtext; } elsif ($self->{isDirector} > 3) { $self->{curInfo}->{director} = $self->{curInfo}->{director}.", ".$origtext; } $self->{insideDirector} = 0; } elsif ($self->{insideActors}) { if ($self->{isActors} == 1) { $self->{curInfo}->{actors} = $origtext; } elsif ($self->{isActors} > 1) { if ($origtext eq "...") { $self->{isActors} = -1; } else { $self->{curInfo}->{actors} = $self->{curInfo}->{actors}.", ".$origtext; } } $self->{insideActors} = 0; } elsif ($self->{insideSynopsis} == 1) { #$origtext =~ s/^\s+//; $self->{curInfo}->{synopsis} = $origtext; $self->{insideSynopsis} = 2; } elsif ($self->{isTime} == 1 || $self->{isDirector} == 1 || $self->{isCountry} == 1) { $self->{isDirector} = 0; $self->{isTime} = 0; $self->{isCountry} = 0; if ($origtext eq "время") { $self->{isTime} = 2; } elsif ($origtext eq "режиссер") { $self->{isDirector} = 2; } elsif ($origtext eq "страна") { $self->{isCountry} = 2; } } elsif ($self->{insideTime}) { $self->{curInfo}->{time} = $origtext; $self->{insideTime} = 0; } elsif ($self->{insideGenre}) { if ($self->{isGenre} == 1) { $self->{curInfo}->{genre} = $origtext; } elsif ($self->{isGenre} > 1) { $self->{curInfo}->{genre} = $self->{curInfo}->{genre}.", ".$origtext; } $self->{insideGenre} = 0; } } } sub end { my ($self, $tagname) = @_; $self->{inside}->{$tagname}--; if ($self->{parsingList}) { # Your code for processing search results here } else { if ($tagname eq "tr" && $self->{isDirector} >= 2) { $self->{isDirector} = 0; } elsif ($tagname eq "tr" && $self->{isGenre} != 0) { $self->{isGenre} = 0; } elsif ($tagname eq "td") { $self->{insideActorList} = 0; } } } sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = $class->SUPER::new(); bless ($self, $class); $self->{hasField} = { title => 1, date => 1, director => 0, actors => 0, }; $self->{isInfo} = 0; $self->{isMovie} = 0; $self->{isYear} = 0; $self->{isDirector} = 0; $self->{isActors} = 0; $self->{isTime} = 0; $self->{isGenre} = 0; $self->{isCountry} = 0; $self->{curName} = undef; $self->{curUrl} = undef; $self->{insideActorList} = 0; return $self; } sub getName { return "Kinopoisk"; } sub getAuthor { return 'Nazarov Pavel'; } sub getLang { return 'RU'; } sub getCharset { my $self = shift; return "windows-1251"; } sub getSearchCharset { my $self = shift; return "windows-1251"; } sub getSearchUrl { my ($self, $word) = @_; return "http://www.kinopoisk.ru/index.php?kp_query=$word"; } sub getItemUrl { my ($self, $url) = @_; return $url if $url =~ /^http:/; return "http://www.kinopoisk.ru/" . $url; } sub preProcess { my ($self, $html) = @_; $self->{parsingEnded} = 0; $html =~ s/…/\.\.\./g; $html =~ s/\x92/'/g; $html =~ s/\x93/“/g; $html =~ s/\x94/”/g; $html =~ s/—/—/g; $html =~ s/""/'"/g; $html =~ s/""/"'/g; $html =~ s|
|
|; $html =~ s/

/\x0A/g; return $html; } } 1;