package GCPlugins::GCboardgames::GCtrictrac;
###################################################
#
# Copyright 2005-2010 Christian Jodar
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use GCPlugins::GCboardgames::GCboardgamesCommon;
{
package GCPlugins::GCboardgames::GCPlugintrictrac;
use base qw(GCPlugins::GCboardgames::GCboardgamesPluginsBase);
sub start
{
my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
$self->{inside}->{$tagname}++;
if ($self->{parsingEnded})
{
return;
}
if ($self->{parsingList})
{
# Parse the search results here
# Check if we are currently parsing an item page, not a search results page (ie - exact match has taken us straight to the page)
# Do this by checking if there is a heading on the page
if (($tagname eq "font") && ($attr->{style} =~ /FONT-SIZE: 20px/))
{
# Stop parsing results, switch to item parsing
$self->{parsingEnded} = 1;
$self->{itemIdx} = 0;
$self->{itemsList}[0]->{url} = $self->{loadedUrl};
}
# Quite easy to parse the search results page since all the information we need (url, title, year) is contained within the
# tag for the image of each search result
# TODO - check how search results look when they do not have an image??
# Check if tag is an , the url referenced is valid (not "#"), and the onmouseover text looks right
if (($tagname eq "a") && ($attr->{href} ne "#") && ($attr->{onmouseover} =~ /^(return overlib)/))
{
# Add to search results
$self->{itemIdx}++;
$self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
my $mouseoverText = $attr->{onmouseover};
# Parse some regular expressions to find the name and release date
if ($mouseoverText =~ /(.+)<\/b>/)
{
$self->{itemsList}[$self->{itemIdx}]->{name} = $1;
}
if ($mouseoverText =~ /<\/b> \((\d+)\)/)
{
$self->{itemsList}[$self->{itemIdx}]->{released} = $1;
}
}
}
else
{
# Parse the items page here. Basically we do this by seaching for tags which match certain criteria, then preparing to grab
# the text inside these tags
if (($tagname eq "font") && ($attr->{style} =~ /FONT-SIZE: 20px/))
{
$self->{insideName} = 1;
}
elsif (($tagname eq "font") && ($attr->{style} =~ /FONT-SIZE: 12px/))
{
if ($self->{nextIsPlayers})
{
$self->{insidePlayers} = 1;
$self->{nextIsPlayers} = 0;
}
if ($self->{nextIsAges})
{
$self->{insideAges} = 1;
$self->{nextIsAges} = 0;
}
if ($self->{nextIsPlayingTime})
{
$self->{insidePlayingTime} = 1;
$self->{nextIsPlayingTime} = 0;
}
}
elsif (($tagname eq "td") && ($attr->{height} eq "250") && ($attr->{width} eq "250"))
{
$self->{insideImage} = 1;
}
elsif ($tagname eq "img")
{
if ($self->{insideImage})
{
$self->{curInfo}->{boxpic} = "http://trictrac.net".$attr->{src} if ! $self->{curInfo}->{boxpic};
$self->{insideImage} = 0;
}
}
elsif ($tagname eq "a")
{
if ($self->{nextIsYear})
{
$self->{insideYear} = 1;
$self->{nextIsYear} = 0;
}
if ($self->{insideDesignerRow})
{
$self->{insideDesigner} = 1;
}
if ($self->{insideIllustratorRow})
{
$self->{insideIllustrator} = 1;
}
if ($self->{nextIsPublishers})
{
$self->{insidePublishers} = 1;
$self->{nextIsPublishers} = 0;
}
if ($self->{insideMechanicRow})
{
$self->{insideMechanic} = 1;
}
if ($self->{insideCategoryRow})
{
$self->{insideCategory} = 1;
}
}
elsif ($tagname eq "b")
{
if ($self->{insideExpansionList})
{
$self->{insideExpansion} = 1;
}
}
elsif (($tagname eq "p") && ( $attr->{style} =~ /TEXT-ALIGN: justify/))
{
$self->{insideDescription} = 1;
}
if ($self->{insideDescription})
{
if ($tagname eq "br")
{
# neatens up the description a little by starting new line on br tags
$self->{curInfo}->{description} .= "\n";
}
elsif ($tagname eq "li")
{
# basic formatting of lists
$self->{curInfo}->{description} .= " - ";
}
}
}
}
sub end
{
my ($self, $tagname) = @_;
$self->{inside}->{$tagname}--;
if ($tagname eq "tr")
{
if ($self->{insideDesignerRow})
{
# Use regex to strip final , off end of line
$self->{curInfo}->{designedby} =~ s/(, )$//;
$self->{insideDesignerRow} = 0;
}
if ($self->{insideIllustratorRow})
{
# Use regex to strip final , off end of line
$self->{curInfo}->{illustratedby} =~ s/(, )$//;
$self->{insideIllustratorRow} = 0;
}
if ($self->{insideMechanicRow})
{
$self->{insideMechanicRow} = 0;
}
if ($self->{insideCategoryRow})
{
$self->{insideCategoryRow} = 0;
}
}
elsif ($tagname eq "table")
{
if ($self->{insideExpansionList})
{
$self->{insideExpansionList} = 0;
}
}
elsif ($tagname eq "b")
{
if ($self->{insideExpands})
{
$self->{curInfo}->{expansionfor} =~ s/"//g;
$self->{insideExpands} = 0;
}
}
elsif (($tagname eq "td") && ($self->{insideDescription}))
{
$self->{insideDescription} = 0;
# remove spaces from start and end of description
$self->{curInfo}->{description} =~ s/^\s+//;
$self->{curInfo}->{description} =~ s/\s+$//;
}
}
sub text
{
my ($self, $origtext) = @_;
return if (length($origtext) < 2);
$origtext =~ s/"/"/g;
$origtext =~ s/³/3/g;
$origtext =~ s/\n//g;
$origtext =~ s/^\s{2,//;
#French accents substitution
$origtext =~ s/à/à/;
$origtext =~ s/é/é/;
return if ($self->{parsingEnded});
if ($self->{parsingList})
{
}
else
{
# fetching information from page
if ($origtext =~ /^Nom VO/)
{
$self->{curInfo}->{original} = $origtext;
$self->{curInfo}->{original} =~ s/Nom VO : //;
}
if ($self->{insideName})
{
$self->{curInfo}->{name} = $origtext;
$self->{insideName} = 0;
}
elsif ($self->{insideYear})
{
$self->{curInfo}->{released} = $origtext;
$self->{curInfo}->{released} =~ s/([^0-9])//g;
$self->{insideYear} = 0;
}
elsif ($self->{insideDesigner})
{
# Append text (and trailing ,) to existing designer field
$self->{curInfo}->{designedby} .= $origtext.", ";
$self->{insideDesigner} = 0;
}
elsif ($self->{insideIllustrator})
{
# Append text (and trailing ,) to existing designer field
$self->{curInfo}->{illustratedby} .= $origtext.", ";
$self->{insideIllustrator} = 0;
}
elsif ($self->{insidePublishers})
{
$self->{curInfo}->{publishedby} = $origtext;
$self->{insidePublishers} = 0;
}
elsif ($self->{insidePlayers})
{
$self->{curInfo}->{players} = $origtext;
$self->{insidePlayers} = 0;
}
elsif ($self->{insideAges})
{
$self->{curInfo}->{suggestedage} = $origtext;
$self->{insideAges} = 0;
}
elsif ($self->{insidePlayingTime})
{
$self->{curInfo}->{playingtime} = $origtext;
$self->{insidePlayingTime} = 0;
}
elsif ($self->{insideExpands})
{
$self->{curInfo}->{expansionfor} .= $origtext;
}
elsif ($self->{insideExpansion})
{
$self->{curInfo}->{expandedby} .= $self->capWord($origtext).',';
$self->{insideExpansion} = 0;
}
elsif ($self->{insideDescription})
{
$self->{curInfo}->{description} .= $origtext;
}
elsif ($self->{insideMechanic})
{
$self->{curInfo}->{mechanics} .= $self->capWord($origtext).',';
$self->{insideMechanic} = 0;
}
elsif ($self->{insideCategory})
{
$self->{curInfo}->{category} .= $self->capWord($origtext).',';
$self->{insideCategory} = 0;
}
# Pre-detection based on text (not tags) for various fields
# that have no specific id in tags
if ($origtext =~ /^Ann\xe9e/)
{
$self->{nextIsYear} = 1;
}
if ($origtext =~ /^Auteur/)
{
$self->{insideDesignerRow} = 1;
}
if ($origtext =~ /^Illustrateur/)
{
$self->{insideIllustratorRow} = 1;
}
if ($origtext =~ /^Editeur/)
{
$self->{nextIsPublishers} = 1;
}
if ($origtext =~ /^Joueurs/)
{
$self->{nextIsPlayers} = 1;
}
if ($origtext =~ /^Age/)
{
$self->{nextIsAges} = 1;
}
if ($origtext =~ /^Dur/)
{
$self->{nextIsPlayingTime} = 1;
}
if ($origtext =~ /^Ceci est une extension pour/)
{
$self->{insideExpands} = 1;
}
if ($origtext =~ /canisme\(s\)/)
{
$self->{insideMechanicRow} = 1;
}
if ($origtext =~ /Th.{1,8}me\(s\)/)
{
$self->{insideCategoryRow} = 1;
}
if ($origtext =~ /^Les extensions/)
{
$self->{insideExpansionList} = 1;
}
}
}
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
bless ($self, $class);
$self->{hasField} = {
name => 1,
released => 1,
};
$self->{isBoardgame} = 0;
$self->{curName} = undef;
$self->{curUrl} = undef;
return $self;
}
sub preProcess
{
my ($self, $html) = @_;
$self->{parsingEnded} = 0;
$html =~ s/""/'"/g;
$html =~ s/""/"'/g;
$html =~ s|
|
|;
$html =~ s|\x{92}|'|gi;
$html =~ s||'|gi;
$html =~ s||*|gi;
$html =~ s|
|...|gi;
$html =~ s|\x{85}|...|gi;
$html =~ s|\x{8C}|OE|gi;
$html =~ s|\x{9C}|oe|gi;
return $html;
}
sub getSearchUrl
{
my ($self, $word) = @_;
# Url returned below is the for the search page, where $word is replaced by the search
return "http://trictrac.net/index.php3?id=jeux&rub=ludotheque&inf=cat&choix=$word";
}
sub getItemUrl
{
my ($self, $url) = @_;
return $url if $url =~ /^http:/;
if ($url =~ /^\//)
{
return "http://trictrac.net".$url;
}
else
{
return "http://trictrac.net/".$url;
}
}
sub getName
{
return "Tric Trac";
}
sub getAuthor
{
return 'Florent';
}
sub getLang
{
return 'FR';
}
}
1;