#!/usr/bin/perl
# $Id: focus-news.pl,v 1.1 2005/12/06 15:41:34 root Exp root $
# #############################################################################

use strict;
use Data::Dumper;
use LWP::Simple;
use HTML::TokeParser::Simple;

my $urls = {
    Institutions => 'http://www.focus-news.net/index.php?catid=68&ch=11',
    Politics     => 'http://focus-news.net/index.php?catid=80&ch=5',
    Iconomics    => 'http://focus-news.net/index.php?catid=86&ch=10',
    Finace       => 'http://focus-news.net/index.php?catid=97&ch=6',
    Society      => 'http://focus-news.net/index.php?catid=104&ch=8',
    Security     => 'http://focus-news.net/index.php?catid=110&ch=9',
    Police       => 'http://www.focus-news.net/index.php?catid=120&ch=0',
    Health       => 'http://focus-news.net/index.php?catid=121&ch=0',
    World        => 'http://focus-news.net/index.php?catid=134&ch=6',
};

print Dumper feed($urls->{'World'});

###############################################################################
sub feed
{
    my $url = shift;

    my $content = get($url);
    my $parser  = HTML::TokeParser::Simple->new(\$content);

    my $items   = [];
    my $href    = "";
    my $title   = "";
    my $date    = "";
    my $is_date = 0;

    while (my $token = $parser->get_token) {
        next if $token->is_comment;

        if (is_date($token)) {
            $is_date = 1;
            next;
        }

        if ($is_date && $token->is_text) {
            $date    = $token->as_is;
            $is_date = 0;
            next;
        }

        if (is_news_link($token)) {
            $href = $token->get_attr('href');
            $href =~ s/\&PHPSESSID=[^\&]+\&?//i;
            next;
        }

        if ($href && $token->is_text && $date) {
            $title = $token->as_is;
            $title =~ s/\s*$//g;
            $title =~ s/^\s*//g;
            next;
        }

        if ($title && $href) {
            push @{$items},
              {
                title => $title,
                href  => "http://www.focus-news.net/$href",
                date  => $date,
              };

            $title = 0;
            $href  = 0;
        }
    }

    return $items;
}

sub is_news_link
{
    my $token = shift;
    if (   $token->get_attr('class') eq 'ListCatsNews'
        && $token->is_start_tag('a')
        && $token->get_attr('onMouseOut')
        && $token->get_attr('onMouseOver'))
    {
        return 1;
    }
    else {
        return 0;
    }

}

sub is_date
{
    my $token = shift;
    if (   $token->is_start_tag('span')
        && $token->get_attr('class') eq 'ListCatsDateSource')
    {
        return 1;
    }
    else {
        return 0;
    }
}

__END__