diff options
author | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
commit | 126bb8cb6b93240bb4d3a2b816b74c286c3d422b (patch) | |
tree | e66e1dfe77d53a52539489765c88d23e4423ae27 /lib/gcstar/GCPlugins/GCfilms/GCOnet.pm |
Imported Upstream version 1.7.0upstream/1.7.0
Diffstat (limited to 'lib/gcstar/GCPlugins/GCfilms/GCOnet.pm')
-rw-r--r-- | lib/gcstar/GCPlugins/GCfilms/GCOnet.pm | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm b/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm new file mode 100644 index 0000000..29eef60 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCOnet.pm @@ -0,0 +1,327 @@ +# +# More information here: http://wiki.gcstar.org/en/websites_plugins +# +# GCcollection should be replaced with the kind of collection your +# plugin deals with. e.g. GCfilms, GCgames, GCbooks,... + +# Replace SiteTemplate with your plugin name. +# The package name must exactly match the file name (.pm) +package GCPlugins::GCcollection::GCOnet; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + # Replace SiteTemplate with your exporter name + # It must be the same name as the one used for file and main package name + package GCPlugins::GCfilms::GCPluginOnet; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + # getSearchUrl + # Used to get the URL that to be used to perform searches. + # $word is the query + # Returns the full URL. + sub getSearchUrl + { + my ($self, $word) = @_; + + # Your code here + + return "http://film.onet.pl/filmoteka.html?S=$word"; + } + + # getItemUrl + # Used to get the full URL of an item page. + # Useful when url on results pages are relative. + # $url is the URL as found with a search. + # Returns the absolute URL. + sub getItemUrl + { + my ($self, $url) = @_; + + # Your code here + + return "http://film.onet.pl/" . $url; + } + + # getCharset + # Used to convert charset in web pages. + # Returns the charset as specified in pages. + sub getCharset + { + my $self = shift; + + return "ISO-8859-2"; + } + + # getName + # Used to display plugin name in GUI. + # Returns the plugin name. + sub getName + { + return "Onet"; + } + + # getAuthor + # Used to display the plugin author in GUI. + # Returns the plugin author name. + sub getAuthor + { + return 'Marek Cendrowicz'; + } + + # getLang + # Used to fill in plugin list with user language plugins + # Return the language used for this site (2 letters code). + sub getLang + { + return 'PL'; + } + # getExtra + # Used if the plugin wants an extra column to be displayed in search results + # Return the column title or empty string to hide the column. + sub getExtra + { + return ""; + } + + # changeUrl + # Can be used to change URL if item URL and the one used to + # extract information are different. + # Return the modified URL. + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + # In processing functions below, self->{parsingList} can be used. + # If true, we are processing a search results page + # If false, we are processing a item information page. + + # $self->{inside}->{tagname} (with correct value for tagname) can be used to test + # if we are in the corresponding tag. + + # You have a counter $self->{itemIdx} that have to be used when processing search results. + # It is your responsability to increment it! + + # When processing search results, you have to fill the available fields for results + # + # $self->{itemsList}[$self->{movieIdx}]->{field_name} + # + # When processing a movie page, you need to fill the fields (if available) + # in $self->{curInfo}. + # + # $self->{curInfo}->{field_name} + + # start + # Called each time a new HTML tag begins. + # $tagname is the tag name. + # $attr is reference to an associative array of tag attributes. + # $attrseq is an array reference containing all the attributes name. + # $origtext is the tag text as found in source file + # Returns nothing + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{inside}->{list_title} && $tagname eq 'a') + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $attr->{href}; + $self->{listTitle} = 1; + } + } + else + { + if ($attr->{class} eq 'tyw') + { + $self->{itemTitle} = 1; + } + elsif ($tagname eq 'div' && $attr->{class} eq 'a2') + { + $self->{itemDescription} = 1; + } + elsif ($attr->{class} eq 'item_actor') + { + $self->{itemActor} = 1; + } + elsif ($tagname eq 'img' + && $attr->{class} eq 'pic' + && ($attr->{alt} eq 'Galeria' || $attr->{alt} eq 'Plakat')) + { + $self->{curInfo}->{image} = "http://film.onet.pl/" . $attr->{src}; + } + } + } + + # end + # Called each time a HTML tag ends. + # $tagname is the tag name. + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + # text + # Called each time some plain text (between tags) is processed. + # $origtext is the read text. + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{listTitle}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{listTitle} = 0; + } + elsif ($self->{inside}->{list_date}) + { + ($self->{itemsList}[ $self->{itemIdx} ]->{date}) = ($origtext =~ m/,\s+(\d+)$/); + } + } + else + { + if ($self->{itemTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{itemTitle} = 0; + } + elsif ($self->{inside}->{item_country}) + { + ($self->{curInfo}->{original}) = ($origtext =~ m/(.*)\s+\(/); + ($self->{curInfo}->{country}, $self->{curInfo}->{date}) = + ($origtext =~ m/(\w+),\s+(\d+)\)/); + $origtext =~ s|/|, |g; + ($self->{curInfo}->{genre}) = ($origtext =~ m/\)(.*)/); + } + elsif ($self->{inside}->{item_time}) + { + ($self->{curInfo}->{time}, $self->{curInfo}->{age}) = + ($origtext =~ m/czas\s+(\d+).*\s+od\s+(\d+)/); + } + elsif ($self->{inside}->{item_director}) + { + $self->{curInfo}->{director} .= $origtext; + } + elsif ($self->{itemDescription}) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{itemDescription} = 0; + } + elsif ($self->{itemActor}) + { + $self->{curInfo}->{actors} .= + $self->{curInfo}->{actors} ? ", " . $origtext : $origtext; + $self->{itemActor} = 0; + } + elsif ($self->{inside}->{item_rating}) + { + ($self->{curInfo}->{ratingpress}) = int($origtext * 2 + 0.5); + } + } + } + + # new + # Constructor. + # Returns object reference. + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + # This member should be initialized as a reference + # to a hash. Each keys is a field that could be + # in results with value 1 or 0 if it is returned + # or not. For the list of keys, check the model file + # (.gcm) and search for tags <field> in + # /collection/options/fields/results + $self->{hasField} = { + title => 1, + date => 1, + director => 0, + actors => 0, + }; + + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{title} = ''; + $self->{itemsList}[0]->{url} = ''; + + # Do your init stuff here + bless($self, $class); + return $self; + } + + # preProcess + # Called before each page is processed. You can use it to do some substitutions. + # $html is the page content. + # Returns modified version of page content. + sub preProcess + { + my ($self, $html) = @_; + + $html =~ s{<B>(.*?)</B>}{$1}gms; + + if ($self->{parsingList}) + { + + $html =~ s{<TD class=a2 width="100%">(.*?)</TD>} + {<list_title>$1</list_title>}gms; + $html =~ s{<FONT class=a0 color="#993300">(.*?)</FONT>} + {<list_date>$1</list_date>}gms; + } + else + { + $html =~ s{<BR>}{}g; + $html =~ s{<TD class=a2 valign=top width="100%">(.*?)<} + {<item_country>$1</item_country><}gms; + $html =~ s{<SPAN class=a1>(.*?)</SPAN>} + {<item_time>$1</item_time>}gms; + $html =~ s{Re.yseria: (.*?)Scenariusz} + {<item_director>$1</item_director>}gms; + $html =~ s{Re.yseria: (.*?)wi.cej} + {<item_director>$1</item_director>}gms; + $html =~ s{a2><A class=u} + {a2><A class=item_actor}gms; + $html =~ s{Ocena filmu.*([0-9]\.[0-9]+)/5} + {<item_rating>$1</item_rating>}gms; + } + return $html; + } + +} + +1; |