#!/usr/bin/perl
# $Id: focus-news.pl,v 1.1 2005/12/06 15:41:34 root Exp root $
# #############################################################################
use strict;
use Data::Dumper;
use LWP::Simple;
use HTML::TokeParser::Simple;
my $urls = {
Institutions => 'http://www.focus-news.net/index.php?catid=68&ch=11',
Politics => 'http://focus-news.net/index.php?catid=80&ch=5',
Iconomics => 'http://focus-news.net/index.php?catid=86&ch=10',
Finace => 'http://focus-news.net/index.php?catid=97&ch=6',
Society => 'http://focus-news.net/index.php?catid=104&ch=8',
Security => 'http://focus-news.net/index.php?catid=110&ch=9',
Police => 'http://www.focus-news.net/index.php?catid=120&ch=0',
Health => 'http://focus-news.net/index.php?catid=121&ch=0',
World => 'http://focus-news.net/index.php?catid=134&ch=6',
};
print Dumper feed($urls->{'World'});
###############################################################################
sub feed
{
my $url = shift;
my $content = get($url);
my $parser = HTML::TokeParser::Simple->new(\$content);
my $items = [];
my $href = "";
my $title = "";
my $date = "";
my $is_date = 0;
while (my $token = $parser->get_token) {
next if $token->is_comment;
if (is_date($token)) {
$is_date = 1;
next;
}
if ($is_date && $token->is_text) {
$date = $token->as_is;
$is_date = 0;
next;
}
if (is_news_link($token)) {
$href = $token->get_attr('href');
$href =~ s/\&PHPSESSID=[^\&]+\&?//i;
next;
}
if ($href && $token->is_text && $date) {
$title = $token->as_is;
$title =~ s/\s*$//g;
$title =~ s/^\s*//g;
next;
}
if ($title && $href) {
push @{$items},
{
title => $title,
href => "http://www.focus-news.net/$href",
date => $date,
};
$title = 0;
$href = 0;
}
}
return $items;
}
sub is_news_link
{
my $token = shift;
if ( $token->get_attr('class') eq 'ListCatsNews'
&& $token->is_start_tag('a')
&& $token->get_attr('onMouseOut')
&& $token->get_attr('onMouseOver'))
{
return 1;
}
else {
return 0;
}
}
sub is_date
{
my $token = shift;
if ( $token->is_start_tag('span')
&& $token->get_attr('class') eq 'ListCatsDateSource')
{
return 1;
}
else {
return 0;
}
}
__END__