#!/usr/bin/perl
# salsa - a mass downloader of Airport Facilities Directory information

$salsa_version = "0.0.0.-1";
$salsa_date= "December 2, 2006";
$salsa_developer="Frank Stutzman (salsa-dev\@stutzman.com)";

# set up some constants

#Determine the type of OS we are on and set the path accordingly
#My one concession the gods of portability

$path = $^O =~ /linux|cgywin/i ? "/usr/bin/" : "/usr/local/bin/";

$wget_file = $path . "wget -q -O ";
$wget_pipe = $wget_file ."- ";

$ghost_script = $path . "gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=";

$pages_only=0;         #flag the default of smooshing all the pages togather

@regions=("pac","al","nw","sw","nc","sc","ec","se","ne");
@months=("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC");

# A bunch of urls that we use
$main_naco_url="http://www.naco.faa.gov/";
$naco_afd_url=$main_naco_url . "pdfs/";
$naco_afd_index_url=$main_naco_url . "index.asp?xml=naco/online/d_afd";

# parse the options
for ($i=0; $i <= $#ARGV; $i++) {
   $ARGV[$i] =~ tr/A-Z/a-z/;          #down shift the case on everything
   if ($ARGV[$i] =~ /--help|-h/) {

print <<XDOC; 
NAME
	salsa - the mass downloader of airport/facility information from
	$main_naco_url

SYNOPSIS
	salsa [OPTION] [region_abbr]...

VERSION
	$salsa_version    $salsa_date
	
DESCRIPTION
	The airport/facility directory (A/FD) on the $main_naco_url 
        web site is structured such that each PDF document is a single
	page of the paper copy of directory.  Salsa downloads all the
	directory pages for a given directory and concatinates them 
	into one large PDF file.

ARGUEMENTS
	The arguments to salsa is the list of regions abbreviations for 
	the various regions each directory represents.  Valid identifiers 
	are: @regions.

	These identifiers are case in-sensitive, and multipule identifers 
	can be specifed for each invocation.

	Calling salsa with out any arguements causes it to print out
	the current effective dates and then exits.

OPTIONS 
	--pages-only
	      Do not concatinate the individual pages into one large
  	      PDF file.  Salsa will generate pages which have file names
	      in the form:
	      <region_id>_<page_number>_effective_date.PDF

EXAMPLES
	Download all the A/FD data for the Northwest region.
		salsa nw
	
	Download all the A/FD data for the North West, South West
	and North East regions.
		salsa nw sw ne
	Download all the A/FD data for the Northwest region, but leave
	them as individual pages rather than the default of concatinating 
	them into one big file
		salsa --pages-only nw


BUGS
	The concatination of the individual pages is a little dumb.
	If there are any stray PDF files lingering in the current
	working directory that have file names in a simular format as 
	the as the individual pages, then those PDF files will be added
	to the final PDF file.

	The --pages-only option is of dubious value at the moment as 
	the user has no way of knowing what airports are on what pages.
	The thought is that in a future version will generate html index
	files which will provide this information.

REQUIREMENTS
	wget (1)  - The non-interactive network downloader
	gs   (1)  - The GhostScript Postscript/PDF manipulator

PLATFORMS
	salsa was developed on several different Linux distributions
	and one fairly generic FreeBSD system.  It should work on
	most any unix-eque system that has a vaguely current version
	of perl.  As it relies heavily on reading pipes to wget, it 
	probably doesn't have whisper of a chance of working under 
	windows without a big rewrite.  There is a report of it
        working under cgywin under windows (Thanks, Greg!)

ANSWERS TO QUESTIONS SOMEBODY IS GOING TO ASK
	Because salsa goes with taco, the naco IAP downloader. (and
	guacamole is too hard to type)

AUTHOR
	$salsa_developer

REPORTING BUGS
	If it makes you feel better, you can send your bug reports 
	to the above email address.  If you are desparate to have 
	bugs fixed though, I suggest buying a book on perl and doing
	it yourself.  I already have a life.

COPYRIGHT/WARNINGS
	Copyright, Copyleft, copy it however you like.
	
	This  is  free software; There is NO warranty; not even for 
	MERCHANTABILITY or  FITNESS  FOR  A  PARTICULAR
	PURPOSE.

	And whatever you do don't be a dolt and trust your life
	to the bad programming of some guy on the internet that
	never has gotten rid of the flashing 12:00 on his VCR.
FUTURE
        It very possible that the salsa may have a very short
	life, with its fuctionality being incorporated into 
	taco.
	
SEE ALSO
	taco, but that really doesn't have any information other 
	than its build-in help file.  

XDOC
	exit;
   }

   if ($ARGV[$i] =~ /--pages-only/) {
       $pages_only = 1;
       next;
   }
   if (grep($ARGV[$i],@regions)) {
       push(@get_regions,$ARGV[$i]);
       next;
   }
}

# Lets go to the main NACO site and parse out the effective period of the current
# AFD

open(SITE,"$wget_pipe $naco_afd_index_url |") 
  || die "unable to open pipe to $wget_pipe to get $main_naco_site:$!\n";

while (<SITE>) {
    if(/<a href=\"\/afd.asp\?cycle=afd_.*\&amp;eff=(\d\d)-(\d\d)-(\d{4})\&amp\;end=(\d\d)-(\d\d)-(\d{4})/) {
	print "The effective date of this cycle is from $months[$1-1] $2, $3 to $months[$4-1] $5, $6\n";
	$eff_date="_$2$months[$1-1]$3";
	last;
    }
}

close(SITE);

# the user gave us no valid regions so we will leave at this point
exit if (! defined(@get_regions));

foreach $region_name (@get_regions) {
    print "Getting $region_name region:";
    $page_count=0;
    $failed=0;
    while(! $failed) {
	$page_count++;
# have to do these these in and out things because I want to make sure the files stay
# in the right order when glombed togather
        $file_name_in = $region_name . "_" . $page_count . $eff_date . ".PDF";
	$file_name_out = $region_name . "_" . sprintf("%04d",$page_count) . $eff_date . ".PDF";
	$failed = 
	    system( "$wget_file $file_name_out $naco_afd_url" . $file_name_in);
#	$failed = 1 if ($page_count >5);
	print ".";
    }
    print "\n";

    unlink($file_name_out);

    if (! $pages_only) {
	print "Generating AFD for the $region_name region\n\n";
	system($ghost_script . $region_name . "_afd.PDF $region_name*.PDF");

	for ($i=1; $i <$page_count; $i++){
	    unlink($region_name . "_" . sprintf("%04d",$i) . $eff_date . ".PDF");
	}
    }
}
