#!/usr/bin/perl

use strict;
use LWP::UserAgent;

die "error: no country defined...\n" unless defined $ARGV[0];
chomp(my @aa=`cat $ARGV[0]`);
for (@aa) {
    print " Requesting info from yahoo.com $_...\n";
    get_cities("$_");
    print "-" x 80, "\n";
}

# ---------------------------------------------------
#chomp(my @states = `cat states`);
#for ( @states ){
#    my($a,$b)=split '=', $_;
#    $a =~ s/\s+/_/g;
#    print "-" x 80, "\n$a\[$b\]\n";
#    get_cities($a); 
#}
# ---------------------------------------------------
sub get_cities
{
    $_[0]=~s/\-/ and /g;
    $_[0]=~s/\s+/_/g;
    $_[0]=~s/\/$//;

    my $url = "http://dir.yahoo.com/Regional/Countries/$_[0]/Cities/";
    print " Fetching page: $url\n";


    my @html = &get_($url);
    print " Start parsing data ...\n";
    
    my $html_as_str = join('', @html);
    if ( $html_as_str =~ /Yahoo! - Document Not Found/i ){ 
	print " *** ERROR: Yahoo have no data...return\n"; 
	return; 
    }
    undef $html_as_str;

    if ( scalar @html > 0 )
    {
	my @cc = ();
	#my $cr = 0;
	for ( @html )
	{
	    if ( /^\s*<li><a href=/i )
	    {
		while (/<b>(.*?)<\/b>/ig)
		{
		    my $city = $1;
		    $city =~ s/\(\d+\)$//;
		    $city =~ s/\@//;
		    #print ++$cr," $city\n";
		    push @cc, $city;
		}
	    }
	}
	print " Store [", scalar @cc, "] cities for $_[0]\n";

	# Store if any data...
	if ( scalar @cc > 0 ){
	    # remove "_" from directory name
	    $_[0]=~s/_/ /g;
	    $_[0]=~s/ and /-/;
	    write_in_file("$_[0]/cities", \@cc);
	}
    }
    return;
}
# -----------------------------------------------------------------------------
sub write_in_file # (file, arr_ref)
{
    if (open FF, "> ./$_[0]"){
	print " Start writing data to file [./$_[0]]...";
    }
    elsif ( open FF, "> ./$ARGV[1]" ){
	print "error [./$_[0]] trying next one...\n";
	print " Start writing data to [./$ARGV[1]] next one...";
    }
    else {
	die " *** Error opening file $_[0]: $!";
    }
    print FF join "\n", @{ $_[1]};
    close FF;
    print " [Success]\n";
}
# -----------------------------------------------------------------------------
sub get_ 
{
    my $ua = LWP::UserAgent->new;
    $ua->agent("WebPageIndex/1.0");
    my $req = HTTP::Request->new(GET => $_[0]);
    my $res = $ua->request($req);
    
    if ( defined $res->is_success ) {
	return(split("\n", $res->content));
    }
    else {
	return undef;
    }
}