diff options
author | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
commit | 126bb8cb6b93240bb4d3a2b816b74c286c3d422b (patch) | |
tree | e66e1dfe77d53a52539489765c88d23e4423ae27 /lib/gcstar/GCPlugins/GCPluginsBase.pm |
Imported Upstream version 1.7.0upstream/1.7.0
Diffstat (limited to 'lib/gcstar/GCPlugins/GCPluginsBase.pm')
-rw-r--r-- | lib/gcstar/GCPlugins/GCPluginsBase.pm | 396 |
1 files changed, 396 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCPluginsBase.pm b/lib/gcstar/GCPlugins/GCPluginsBase.pm new file mode 100644 index 0000000..728e23a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCPluginsBase.pm @@ -0,0 +1,396 @@ +package GCPlugins::GCPluginsBase; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +{ + package GCPluginParser; + use base qw(HTML::Parser); + use LWP::Simple qw($ua); + use HTTP::Cookies::Netscape; + use URI::Escape; + use HTML::Entities; + use Encode; + use File::Spec; + + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $ua->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041111 Firefox/1.0'); + $ua->default_header('Accept-Encoding' => 'x-gzip'); + $ua->default_header('Accept' => 'text/html'); + $self->{ua} = $ua; + + $self->{itemIdx} = -1; + $self->{itemsList} = (); + + bless ($self, $class); + return $self; + } + + sub getItemsNumber + { + my ($self) = @_; + + return $self->{itemIdx} + 1; + } + + sub getItems + { + my ($self) = @_; + return @{$self->{itemsList}}; + } + + sub load + { + my $self = shift; + + $self->checkProxy; + $self->checkCookieJar; + + $self->{itemIdx} = -1; + $self->{isInfo} = 0; + $self->{itemsList} = (); + + #my $word = uri_escape_utf8($self->{title}); + my $title2 = encode($self->getSearchCharset, $self->{title}); + my $word = uri_escape($title2); + $word =~ s/%20/+/g; + + my $post; + my $html; + + # For multi-pass plugins, the plugin will have set the url to load for + # the next pass as nextUrl. If this doesn't exist, we're either on the + # first pass, or only using a one-pass plugin, so call getSearchUrl + # to find the url to retrieve + if ($self->{nextUrl}) + { + $html = $self->loadPage($self->{nextUrl}); + } + else + { + $html = $self->loadPage($self->getSearchUrl($word)); + } + + $self->{parsingList} = 1; + $html = $self->preProcess($html); + decode_entities($html) + if $self->decodeEntitiesWanted; + $self->{inside} = undef; + $self->parse($html); + + my @noConversion = @{$self->getNotConverted}; + foreach my $item (@{$self->{itemsList}}) + { + foreach (keys %{$item}) + { + next if $_ eq 'url'; + $item->{$_} = $self->convertCharset($item->{$_}) + if ! GCUtils::inArrayTest($_, @noConversion); + } + } + + } + + sub loadPage + { + my ($self, $url, $post, $noSave) = @_; + my $debugPhase = $ENV{GCS_DEBUG_PLUGIN_PHASE}; + my $debugFile; + $debugFile = File::Spec->tmpdir.'/'.GCUtils::getSafeFileName($url) + if ($debugPhase > 0); + $self->{loadedUrl} = $url if ! $noSave; + my $response; + my $result; + if ($debugPhase != 2) + { + if ($post) + { + $response = $ua->post($url, $post); + } + else + { + $response = $ua->get($url); + } + + #UnclePetros 03/07/2011: + #code to handle correctly 302 response messages + my $label1 = $response->code; + if($response->code == '302'){ + my $location = $response->header("location"); + $response = $ua->get($location); + $self->{loadedUrl} = $location; + } + + eval { + $result = $response->decoded_content; + }; + if ($debugPhase == 1) + { + open DEBUG_FILE, ">$debugFile"; + print DEBUG_FILE ($result || $response->content); + close DEBUG_FILE; + } + } + else + { + local $/; + open DEBUG_FILE, "$debugFile"; + $result = <DEBUG_FILE>; + utf8::decode($result); + } + return $result || ($response && $response->content); + } + + sub capWord + { + my ($self, $msg) = @_; + + use locale; + + (my $newmsg = lc $msg) =~ s/(\s|,|^)(\w)(\w)(\w*?)/$1\U$2\E$3$4/gi; + return $newmsg; + } + + sub getSearchFieldsArray + { + return ['']; + } + + sub getSearchFields + { + my ($self, $model) = @_; + + my $result = ''; + $result .= $model->getDisplayedLabel($_).', ' foreach (@{$self->getSearchFieldsArray}); + $result =~ s/, $//; + return $result; + } + + sub hasField + { + my ($self, $field) = @_; + + return $self->{hasField}->{$field}; + } + + sub getExtra + { + return ''; + } + + # Character set for web page text + sub getCharset + { + my $self = shift; + + return "ISO-8859-1"; + } + + # Character set for encoding search term, can sometimes be different + # to the page encoding, but we default to the same as the page set + sub getSearchCharset + { + my $self = shift; + + return getCharset; + } + + # For some plugins, we need extra checks to determine if urls match + # the language the plugin is written for. This allows us to correctly determine + # if a drag and dropped url is handled by a particular plugin. If these + # checks are necessary, return 1, and make sure plugin handles the + # the testURL function correctly + sub needsLanguageTest + { + return 0; + } + + # Used to test if a given url is handled by the plugin. Only required if + # needsLanguageTest is true. + sub testURL + { + my ($self, $url) = @_; + return 1 + } + + # Determines whether plugin should be the default plugins gcstar uses. + # Plugins with this attribute set will appear first in plugin list, + # and will be highlighted with a star icon. A returned value of 1 + # means the plugin is preferred if it's language matches the user's language, + # a returned value of 2 mean's it's preferred regardless of the language. + sub isPreferred + { + return 0; + } + + sub getPreferred + { + return isPreferred; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub decodeEntitiesWanted + { + return 1; + } + + sub getDefaultPictureSuffix + { + return ''; + } + + sub convertCharset + { + my ($self, $value) = @_; + + my $result = $value; + if (ref($value) eq 'ARRAY') + { + foreach my $line(@{$value}) + { + my $i = 0; + map {$_ = decode($self->getCharset, $_)} @{$line}; + } + } + else + { + eval { + $result = decode($self->getCharset, $result); + }; + } + return $result; + } + + sub getItemInfo + { + my $self = shift; + + eval { + $self->init; + }; + my $idx = $self->{wantedIdx}; + my $url = $self->getItemUrl($self->{itemsList}[$idx]->{url}); + $self->loadUrl($url); + return $self->{curInfo}; + } + + sub changeUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub loadUrl + { + my ($self, $url) = @_; + $self->checkProxy; + $self->checkCookieJar; + my $realUrl = $self->changeUrl($url); + my $html = $self->loadPage($realUrl); + $self->{parsingList} = 0; + #$html = $self->convertCharset($html); + $self->{curInfo} = {}; + + $html = $self->preProcess($html); + decode_entities($html) + if $self->decodeEntitiesWanted; + + $self->{curInfo}->{$self->{urlField}} = $url; + $self->{inside} = undef; + $self->parse($html); + + my @noConversion = @{$self->getNotConverted}; + foreach (keys %{$self->{curInfo}}) + { + next if $_ eq $self->{urlField}; + $self->{curInfo}->{$_} = $self->convertCharset($self->{curInfo}->{$_}) + if ! GCUtils::inArrayTest($_, @noConversion); + if (ref($self->{curInfo}->{$_}) ne 'ARRAY') + { + $self->{curInfo}->{$_} =~ s/\|/,/gm; + $self->{curInfo}->{$_} =~ s/\r//gm; + $self->{curInfo}->{$_} =~ s/[ \t]*$//gm; + } + } + $self->{curInfo}->{$self->{urlField}} .= $GCModel::linkNameSeparator.$self->getName; + return $self->{curInfo}; + } + + sub setProxy + { + my ($self, $proxy) = @_; + + $self->{proxy} = $proxy; + } + + sub checkProxy + { + my $self = shift; + $ua->proxy(['http'], $self->{proxy}); + #$self->{ua}->proxy(['http'], $self->{proxy}); + } + + sub setCookieJar + { + my ($self, $cookieJar) = @_; + $self->{cookieJar} = $cookieJar; + } + + sub checkCookieJar + { + my $self = shift; + $ua->cookie_jar(HTTP::Cookies::Netscape->new( + 'file' => "$self->{cookieJar}", + 'autosave' => 1,)); + } + + # Used to set the number of passes the plugin requires + sub getNumberPasses + { + # Most plugins only need to search once, so default to one pass + return 1; + } + + # Returns undef if it doesn't support search using barcode scanner + sub getEanField + { + return undef; + } + +} + +1; |