#!/usr/bin/perl
use strict;
use LWP::UserAgent;
die "error: no country defined...\n" unless defined $ARGV[0];
chomp(my @aa=`cat $ARGV[0]`);
for (@aa) {
print " Requesting info from yahoo.com $_...\n";
get_cities("$_");
print "-" x 80, "\n";
}
# ---------------------------------------------------
#chomp(my @states = `cat states`);
#for ( @states ){
# my($a,$b)=split '=', $_;
# $a =~ s/\s+/_/g;
# print "-" x 80, "\n$a\[$b\]\n";
# get_cities($a);
#}
# ---------------------------------------------------
sub get_cities
{
$_[0]=~s/\-/ and /g;
$_[0]=~s/\s+/_/g;
$_[0]=~s/\/$//;
my $url = "http://dir.yahoo.com/Regional/Countries/$_[0]/Cities/";
print " Fetching page: $url\n";
my @html = &get_($url);
print " Start parsing data ...\n";
my $html_as_str = join('', @html);
if ( $html_as_str =~ /Yahoo! - Document Not Found/i ){
print " *** ERROR: Yahoo have no data...return\n";
return;
}
undef $html_as_str;
if ( scalar @html > 0 )
{
my @cc = ();
#my $cr = 0;
for ( @html )
{
if ( /^\s*<li><a href=/i )
{
while (/<b>(.*?)<\/b>/ig)
{
my $city = $1;
$city =~ s/\(\d+\)$//;
$city =~ s/\@//;
#print ++$cr," $city\n";
push @cc, $city;
}
}
}
print " Store [", scalar @cc, "] cities for $_[0]\n";
# Store if any data...
if ( scalar @cc > 0 ){
# remove "_" from directory name
$_[0]=~s/_/ /g;
$_[0]=~s/ and /-/;
write_in_file("$_[0]/cities", \@cc);
}
}
return;
}
# -----------------------------------------------------------------------------
sub write_in_file # (file, arr_ref)
{
if (open FF, "> ./$_[0]"){
print " Start writing data to file [./$_[0]]...";
}
elsif ( open FF, "> ./$ARGV[1]" ){
print "error [./$_[0]] trying next one...\n";
print " Start writing data to [./$ARGV[1]] next one...";
}
else {
die " *** Error opening file $_[0]: $!";
}
print FF join "\n", @{ $_[1]};
close FF;
print " [Success]\n";
}
# -----------------------------------------------------------------------------
sub get_
{
my $ua = LWP::UserAgent->new;
$ua->agent("WebPageIndex/1.0");
my $req = HTTP::Request->new(GET => $_[0]);
my $res = $ua->request($req);
if ( defined $res->is_success ) {
return(split("\n", $res->content));
}
else {
return undef;
}
}