#!/usr/bin/perl
# Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
# 
# If you use it/like it, send a postcard to the author. 
# Find the latest version in CPAN

use Cwd        qw(abs_path);
use Getopt::Long;
# use LWP::UserAgent;
# use HTTP::Cookies;
use URI::URL;
use URI::Heuristic    qw(uf_uristr);
use URI::Escape     qw(uri_escape);
use LWP::MediaTypes    qw(media_suffix);
# use HTTP::Status;
use HTTP::Daemon;
# use IO::Socket qw(getpeername);
use CGI qw/escape unescape/;
#if ($^O =~ /win32/i) { 
#    eval { require Win32::Internet; };
#}
use strict;

    #!/usr/bin/perl
    # Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    # This program is free software; you can redistribute it and/or
    # modify it under the same terms as Perl itself.
    # 
    # If you use it/like it, send a postcard to the author. 
    # Find the latest version in CPAN

    use LWP::UserAgent;
    if ($^O =~ /win32/i) { 
        eval { require Win32::Internet; };
    }
    use strict;

    # ftp.pm -- modified from Gisle Aas' "LWP::Protocol::ftp"
    # by Flavio S. Glock
    #
    # oct-13-2000: Modified to include "REST" support

    # $Id: ftp.pm,v 1.27 1999/11/04 20:25:51 gisle Exp $
    # Implementation of the ftp protocol (RFC 959). We let the Net::FTP
    # package do all the dirty work.

    package ftp;

    use Carp ();

    use HTTP::Status ();
    use HTTP::Negotiate ();
    use HTTP::Response ();
    use LWP::MediaTypes ();
    use File::Listing ();
    use Net::Cmd qw(CMD_MORE);

    require LWP::Protocol;
    our @ISA = qw(LWP::Protocol);

    use strict;
    eval {
        require Net::FTP;
        Net::FTP->require_version(2.00);
    };
    my $init_failed = $@;

    my $DEBUG = 0;


    sub request
    {
        # arg is the receive-data callback subroutine
        my($request, $timeout, $arg) = @_;

        print "  [ ftp::request BEGIN ]\n" if $DEBUG;
        if ($init_failed) {
            print "  [ ftp::request DONE 2 ]\n" if $DEBUG;
            return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $init_failed);
        }

        my $size = 65536;
        my $method   = 'GET';

        my $url = $request->url;
        my $scheme   = $url->scheme;
        my $host     = $url->host;
        my $port     = $url->port;
        my $user     = $url->user;
        my $password = $url->password;

        # If a basic autorization header is present than we prefer these over
        # the username/password specified in the URL.
        my($u,$p) = $request->authorization_basic;
        if (defined $u) {
            $user = $u;
            $password = $p;
        }

        # We allow the account to be specified in the "Account" header
        my $acct     = $request->header('Account');

        # try to make a connection
        my $ftp = Net::FTP->new($host, Port => $port);
        unless ($ftp) {
            $@ =~ s/^Net::FTP: //;
            print "  [ ftp::request DONE 3 ]\n" if $DEBUG;
            return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@);
        }

        # Create an initial response object
        my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "Document follows");
        $response->request($request);

        my $mess = $ftp->message;  # welcome message
        $mess =~ s|\n.*||s; # only first line left
        $mess =~ s|\s*ready\.?$||;
        # Make the version number more HTTP like
        $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
        $response->header("Server", $mess);

        $ftp->timeout($timeout) if $timeout;

        print "  [ ftp::request Logging in as $user (password $password)... ]\n" if $DEBUG;
        unless ($ftp->login($user, $password, $acct)) {
            # Unauthorized.  Let's fake a RC_UNAUTHORIZED response
            my $res =  HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, scalar($ftp->message));
            $res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
            print "  [ ftp::request DONE 4 ]\n" if $DEBUG;
            return $res;
        }

        # Get & fix the path
        my @path =  grep { length } $url->path_segments;
        my $remote_file = pop(@path);
        $remote_file = '' unless defined $remote_file;

        $ftp->binary;

        for (@path) {
            unless ($ftp->cwd($_)) {
                print "  [ ftp::request DONE 5 ]\n" if $DEBUG;
                return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, "Can't chdir to $_");
            }
        }

        unless ($method eq 'GET' || $method eq 'HEAD') {
            print "  [ ftp::request DONE 6 ]\n" if $DEBUG;
            return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
                       "Illegal method $method");
        }

        if (my $mod_time = $ftp->mdtm($remote_file)) {
                $response->last_modified($mod_time);
                if (my $ims = $request->if_modified_since) {
                    if ($mod_time <= $ims) {
                        $response->code(&HTTP::Status::RC_NOT_MODIFIED);
                        $response->message("Not modified");
                        print "  [ ftp::request DONE 7 ]\n" if $DEBUG;
                        return $response;
                    }
                }
        }

        my $data;  # the ftp data handle
        my $content;

        # Range: bytes=9500-
        my $range = $request->header("Range");    # request
        my ($content_begin) = $range =~ /bytes\s?\=\s?(\d+)\s?\-/;

        print "  [ ftp::request remote_file: $remote_file ", length($remote_file), "]\n" if $DEBUG;
        print "  [ ftp::request header: ", $request->as_string, " ]\n" if $DEBUG;
        print "  [ ftp::request range: $range => $content_begin ]\n" if $DEBUG;

        # print "  [ ftp::response header: ", $response->as_string, " ]\n" if $DEBUG;
        # my $content_range = $request->header("Content-Range"); # response

        # my $ok = $ftp->quot("REST $content_begin");
        my $rest_ok = 0;
        unless ($ftp->_REST($content_begin)) {
            print "  [ ftp::request rest: error ]\n" if $DEBUG;
            $ftp->_REST(0);    # cancel last _REST
            $content_begin = 0;
        }
        else {
            $rest_ok = 1;
            print "  [ ftp::request rest: ok ]\n" if $DEBUG;
        }

        if (length($remote_file) and $data = $ftp->retr($remote_file)) {
            print "  [ ftp::request remote_file: $remote_file ]\n" if $DEBUG;
            my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
            $response->header('Content-Type',   $type) if $type;
            for (@enc) {
                $response->push_header('Content-Encoding', $_);
            }
            my $mess = $ftp->message;
            my $content_length = 0;
            print "  [ ftp::request mess: $mess $type ]\n" if $DEBUG;
            if ($mess =~ /\((\d+)\s+bytes\)/) {
                $content_length = $1;
                $response->header('Content-Length', $content_length);
            }

            if ($method ne 'HEAD') {

                # Read data from server into callback
                do {
                    my ($size_read, $data_end);
                    eval { $size_read = $data->read($content, $size); };
                    print "  [ ftp::request Data: $size_read ]\n" if $DEBUG;
                    # print "  [ ftp::request Content: ",length($content)," ]\n" if $DEBUG;
                    # print "  [ ftp::request Size: $size ]\n" if $DEBUG;

                    if (! $size_read) {
                        # possibly a timeout
                        $@ = 'No data';
                        print "  [ ftp::request No data ]\n" if $DEBUG;
                        $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
                        $response->header('X-Died' => $@);
                        $response->message("FTP close response: " . $ftp->code . " " . $ftp->message);
                        $data->close;
                        return $response;
                    }

                    if ($rest_ok and ! $@) {
                        $data_end = $content_begin + $size_read;
                        $response->header("Content-Range", "bytes ${content_begin}-${data_end}/$content_length");
                        $response->code(&HTTP::Status::RC_PARTIAL_CONTENT);
                        print "  [ ftp::request Content-Range: ", $response->header("Content-Range"), " ]\n" if $DEBUG;
                        $content_begin = $data_end;
                    }

                    eval { &$arg($content, $response, undef ); } unless $@;
                    if ($@) {
                        chomp($@);
                        $response->header('X-Died' => $@);
                        last;
                    }
                } while $content;
            }    # if ne HEAD

            unless ($data->close) {
                # Something did not work too well
                if ($method ne 'HEAD') {
                            $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
                            $response->message("FTP close response: " . $ftp->code .
                               " " . $ftp->message);
                }
            }
        } elsif (!length($remote_file) || $ftp->code == 550) {
                print "  [ ftp::request remote_file: (none) ]\n" if $DEBUG;
                # 550 not a plain file, try to list instead
                if (length($remote_file) && !$ftp->cwd($remote_file)) {
                        print "  [ chdir before listing failed ]\n" if $DEBUG;
                        print "  [ ftp::request DONE 8 ]\n" if $DEBUG;
                        return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
                           "File '$remote_file' not found");
                }

                # It should now be safe to try to list the directory
                my @lsl = $ftp->dir;

                # Try to figure out if the user want us to convert the
                # directory listing to HTML.
                my @variants = (
                       ['html',  0.60, 'text/html'            ],
                       ['dir',   1.00, 'text/ftp-dir-listing' ]
                );
                #$HTTP::Negotiate::DEBUG=1;
                my $prefer = HTTP::Negotiate::choose(\@variants, $request);

                my $content = '';

                if (!defined($prefer)) {
                    print "  [ ftp::request DONE 9 ]\n" if $DEBUG;
                    return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE,
                       "Neither HTML nor directory listing wanted");
                } elsif ($prefer eq 'html') {
                    $response->header('Content-Type' => 'text/html');
                    $response->header('Content-Location' => "$url/") unless $url =~ /\/$/;
                    $content = "<HEAD><TITLE>File Listing</TITLE>\n";
                    $content .= "</HEAD>\n<BODY>\n";
                    $content .= "<H1>Directory listing of $url</H1>\n";
                    $content .= "<PRE><A HREF=\"..\">Up to higher level directory</A>\n";
                    # my $base = $request->url->clone;
                    # my $path = $base->epath;
                    # $base->epath("$path/") unless $path =~ m|/$|;
                    # $content .= qq(<BASE HREF="$base">\n</HEAD>\n);

                    for (File::Listing::parse_dir(\@lsl, 'GMT')) {
                            my($name, $type, $size, $mtime, $mode) = @$_;
                            $content .= qq(  <LI> <a href="$name">$name</a>);

                            # $type, $size, $mtime, $mode);

                            $content .= " $size bytes" if $type eq 'f';
                            $content .= " =&gt; $1" if $type =~ /l\s*(.*)/;

                            $content .= "\n";    # \n or PRE
                    }
                    $content .= "</PRE></BODY>\n";
                } else {
                    $response->header('Content-Type', 'text/ftp-dir-listing');
                    $content = join("\n", @lsl, '');
                }

                $response->header('Content-Length', length($content));

                if ($method ne 'HEAD') {
                    # $response = LWP::Protocol::collect_once($arg, $response, $content);
                    eval { &$arg($content, $response, undef ); };    # send content
                    eval { &$arg(undef, $response, undef ); };    # finish
                }
        } else {
            print "  [ ftp::request Returning message instead of file. data=[$data] ]\n" if $DEBUG;
            my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "FTP return code " . $ftp->code);
            $res->content_type("text/plain");
            $res->content($ftp->message);
            print "  [ ftp::request DONE 10 ]\n" if $DEBUG;
            return $res;
        }

        print "  [ ftp::request DONE 1 ]\n" if $DEBUG;
        return $response;
    }

    # "pod" removed. See LWP::Protocol::ftp
    #!/usr/bin/perl
    # Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    # This program is free software; you can redistribute it and/or
    # modify it under the same terms as Perl itself.
    # 
    # If you use it/like it, send a postcard to the author. 
    # Find the latest version in CPAN

    use strict;
    package glynx_file;

    our $VERSION = "1.031";

    our $verbose;
    our $quiet;

    our $NAME_TRANSLATION_FILE = "_NAMES_.HTM";
    our $TMP_SUFFIX =        "._TMP_";
    our $INDEXFILE =        "_INDEX_.HTM";
    our $PART_SUFFIX =        "._PART_";


    sub my_link {
        # note: link will COPY files on Windows NT; fatal error on win98
        my ($source, $dest) = @_;
        return if $source eq $dest;
        unless (-e $source) {
            print "  [ LINK: CAN'T FIND $source ]\n" unless $quiet;
            return;
        }
        if (-d $source) {
            print "  [ LINK: CAN'T LINK FROM DIRECTORY ]\n" unless $quiet;
            return;
        }
        if (-e $dest) {
            print "  [ LINK: ALREADY EXISTS: $dest ]\n" unless $quiet;
            return;
        }
        print "  [ LINK: $source to $dest ]\n" if $verbose;
        # link ($source, $dest);
        &my_copy ($source, $dest);
    }

    sub my_unlink {
        my ($source) = @_;
        if (-d $source) {
            print "  [ ERR: WILL NOT UNLINK DIRECTORY ]\n"; 
            return; 
        }
        if (-e $source) {
            unlink $source   or print "  [ ERR: UNLINK $source - $^E ]\n";
        }
    }

    sub my_touch {
        my ($source) = @_;
        my ($now);
        return if ! $source;
        print "  [ TOUCH: $source ]\n" if $verbose;
        $now = time;
        utime $now, $now, $source;
    }


    sub my_create_empty {
        my ($source) = @_;
        print "  [ CREATE-EMPTY: $source ]\n" if $verbose;
        open (FILE, ">>$source");
            binmode FILE; print FILE "";
        close (FILE);
    }

    sub my_copy {
        my ($source, $dest) = @_;
        return if $source eq $dest;
        unless (-e $source) {
            print "  [ COPY: CAN'T FIND $source ]\n";
            return;
        }
        if (-d $source) {
            print "  [ COPY: CAN'T COPY DIRECTORY ]\n";
            return;
        }
        &my_unlink ($dest);
        print "  [ COPY: $source, $dest ]\n" if $verbose;
        open (FILE1, $source)  or print "  [ ERR: CAN'T READ $source - $^E ]\n"; 
        open (FILE2, ">$dest") or print "  [ ERR: CAN'T CREATE $dest - $^E ]\n"; 
            binmode FILE1; 
            binmode FILE2; 
                local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
            while (<FILE1>) {
                print FILE2 $_; 
            }
        close (FILE2);
        close (FILE1);

        # (adapted from: UserAgent.pm)
        if (my $lm = (stat($source))[9] ) {
            # make sure the file has the same last modification time
            utime $lm, $lm, $dest;
        }
    }

    sub my_rename {
        my ($source, $dest) = @_;
        return if $source eq $dest;
        unless (-e $source) {
            # print "  [ RENAME: CAN'T FIND $source ]\n" if $verbose;
            return;
        }
        &my_unlink ($dest);
        unless (rename $source, $dest) {
            # print "  [ RENAME: CAN'T RENAME $source $dest - $^E ]\n";
            print "  [ RENAME: $source, $dest ]\n" if $verbose;
            &my_copy ($source, $dest);
            &my_unlink ($source);
            return;
        }
    }

    sub make_dir {
        # o parametro para make_dir deve incluir a base
        my ($name) = @_;

        return if (-d $name);

        my (@a, $a, $b, $temp, $dest);
        # cria o diretorio
        @a = split('/', $name);
        push @a, '' if $name =~ /\/$/;

        $a = '';
        foreach(0 .. $#a - 1) {
            $a .= $a[$_] . '/';
        }

        if (-d $a) {
            print "  [ DIR: $a ok ]\n" if $verbose;
            return;
        }

        $b = $a; 
        $b =~ s/\/$//;
        if  (-e $b) {
                print "  [ MAKE-DIR: $a is a file ]\n" if $verbose;
                $temp = $b . $TMP_SUFFIX;
                print "  [ MAKE-DIR: MOVE: $b => $temp ]\n" if $verbose;
                &my_rename ($b, $temp);
                mkdir $a, "-w";
                $dest = $b . '/' . $INDEXFILE;
                print "  [ MAKE-DIR: MOVE: $temp => $dest ]\n" if $verbose;
                &my_rename ($temp, $dest);
        }
        $a = '';
        foreach(0 .. $#a - 1) {
                $a .= $a[$_] . '/';
                if (-d $a) {
                    # print "  [ DIR: $a ok ]\n" if $verbose;
                }
                else {
                    print "  [ MAKE-DIR: $a ]\n" if $verbose;
                    mkdir $a, "-w";
                }
        }

    }

    sub modify_file_attrib {
        my ($name,$attrib,$value) = @_;
        my ($filename, $parent, @a, $trans_filename);

        $name =~ s/${PART_SUFFIX}$//;    # remove suffix on log

        ($parent, $filename) = $name =~ /^(.*)\/(.*?)$/;
        print "  [ MODIFY-FILE-ATTRIB: $parent -> $filename ATTRIB: $attrib: $value ]\n" if $verbose;

        &create_translation_file($parent) unless -e $trans_filename;
        $trans_filename = "$parent/$NAME_TRANSLATION_FILE";
        open (TRFILE, $trans_filename); 
            @a = <TRFILE>; 
        close (TRFILE);

        foreach( grep { />\Q${filename}\E</ or /=\Q${filename}\E>/ } @a ) {
            return if (/ $attrib: $value /);
            s/\s?<br>$/ $attrib: $value <br>/;
            print "  [ MODIFY-FILE-ATTRIB: $_ ]\n" if $verbose;
            goto SAVE_MODIFY;
        }
        push @a, "<a href=$filename>$filename</a> $attrib: $value <br>\n"; 
    SAVE_MODIFY:
        &make_dir($trans_filename);
        open (TRFILE, ">$trans_filename") or print "  [ ERR: WRITING $trans_filename - $^E ]\n"; 
            print TRFILE join('', @a); 
        close (TRFILE);
        print "  [ MODIFY-FILE-ATTRIB: NEW: $a[-1] ]\n" if $verbose;
    }

    sub get_file_attrib {
        my ($name,$attrib) = @_;
        my $value = '';
        my ($filename, $parent, @a, $trans_filename);

        $name =~ s/${PART_SUFFIX}$//;    # remove suffix on log

        ($parent, $filename) = $name =~ /^(.*)\/(.*?)$/;
        print "  [ GET-FILE-ATTRIB: $parent -> $filename ATTRIB: $attrib: $value ]\n" if $verbose;

        $trans_filename = "$parent/$NAME_TRANSLATION_FILE";
        open (TRFILE, $trans_filename); 
            @a = <TRFILE>; 
        close (TRFILE);

        foreach( grep { />\Q${filename}\E</ or /=\Q${filename}\E>/ } @a ) {
            return $1 if (/ $attrib: (\S*) /);
        }
        return '';
    }

    sub create_translation_file {
        my ($parent) = @_;
        my ($trans_filename);
        my ($content);
        my ($new_parent);
        $trans_filename = "$parent/$NAME_TRANSLATION_FILE";
        return if -e $trans_filename;
        &make_dir($trans_filename);
        $new_parent = $parent . '/';
        $new_parent =~ s|\/\/|\/|g;
        $content = "<HEAD><TITLE>File Listing</TITLE>\n";
        $content .= "</HEAD>\n<BODY>\n";
        $content .= "<H1>Directory listing of $new_parent</H1>\n";
        $content .= "<a href=\"..\">Up to higher level directory</a><br>\n";
        # $content .= "<PRE>";
        open (TRFILE, ">$trans_filename") or print "  [ ERR: WRITING $trans_filename - $^E ]\n"; 
            print TRFILE $content;
        close (TRFILE);
    }

    #
    # returns a shorter name if there is one
    #
    sub check_translation_file {
        my ($filename, $parent) = @_;
        my ($trans_filename, @a, $tr_str, $new_name);
        # do we have a $NAME_TRANSLATION_FILE ?
        $trans_filename = "$parent/$NAME_TRANSLATION_FILE";
        if (-s $trans_filename) {
            open (TRFILE, $trans_filename); 
                @a = <TRFILE>; 
            close (TRFILE);
            ($tr_str) = grep { />\Q${filename}\E</ } @a;
            if ($tr_str) {
                # "<a href=$new_name>$filename</a><br>\n"
                ($new_name) = $tr_str =~ /=(.*?)>/;
                # print "  [ SHORTER-NAME: FOUND: $tr_str => $new_name ]\n" if $VERBOSE;
                return $new_name;
            }
        }
        return '';
    }

    #
    # returns a bigger name if there is one
    #
    sub check_translation_url_node {
        my ($filename, $parent) = @_;
        my ($tr_str, $trans_filename, @a, $new_name);
        # do we have a $NAME_TRANSLATION_FILE ?
        $trans_filename = "$parent/$NAME_TRANSLATION_FILE";
        if (-s $trans_filename) {
            open (TRFILE, $trans_filename); 
                @a = <TRFILE>; 
            close (TRFILE);
            ($tr_str) = grep { /=\Q${filename}\E>/ } @a;
            if ($tr_str) {
                # "<a href=$new_name>$filename</a><br>\n"
                ($new_name) = $tr_str =~ />(.*?)</;
                # print "  [ URL-NAME: FOUND: $tr_str => $new_name ]\n" if $VERBOSE;
                return $new_name;
            }
        }
        return '';
    }

    #
    # stores a new shorter name
    #
    sub log_translation_file {
        my ($filename, $new_name, $parent) = @_;
        my ($trans_filename);
        $trans_filename = "$parent/$NAME_TRANSLATION_FILE";
        &create_translation_file($parent) unless -e $trans_filename;
        open (TRFILE, ">>$trans_filename") or print "  [ ERR: WRITING $trans_filename - $^E ]\n"; 
            print TRFILE "<a href=$new_name>$filename</a><br>\n"; 
        close (TRFILE);
        # print "  [ SHORTER-NAME: LOGGED: $new_name as $filename at $trans_filename ]\n" if $VERBOSE;
        return;
    }


    1;


    package glynx;

    #require Exporter;
    #@ISA = qw(Exporter);
    #@EXPORT = qw(download);

    our $VERSION = "1.031";

    # parameters to "download" hash
    our $filename;
    our $content_length;
    our $url;
    our $referer =          ".";
    our $agent =            "Mozilla/3.0 (WinNT; I)";
    our $timeout =          30;
    our $cookie_file;
    our $verbose;
    our $quiet;
    our $auth =             '';
    our $post_separator =     "_X_POST_X_";
    our $mtime;
    our $PART_SUFFIX =        "._PART_";
    our $real_name;


    our $NAME_TRANSLATION_FILE = $glynx_file::NAME_TRANSLATION_FILE;
    our $TMP_SUFFIX =        $glynx_file::TMP_SUFFIX;
    our $INDEXFILE =        $glynx_file::INDEXFILE;

    # general control
    our $ua;
    our $Boundary;
    our $num_callback;

    # Defaults
    our $DEFAULT_PART_SIZE = 4096 * 4;

    sub select_best_sample {
        my ($part_filename) = @_;
        my ($filename);
        $filename = $part_filename;
        $filename =~ s/${PART_SUFFIX}$//;    # remove suffix
        $part_filename = $filename . $PART_SUFFIX;
        my $msg = "  [ SELECT-SAMPLE: ERROR $filename";
        # escolhe a melhor tentativa
        if (-s "$filename" > 0) {
            print "  [ SELECT-SAMPLE: EXISTS: $filename ]\n" if $verbose;
            # ja existe o arquivo pronto - apaga os outros
            &glynx_file::my_unlink ("$part_filename"); 
            &glynx_file::my_unlink ("${part_filename}-1");
            return;
        }
        if (! (-e "${part_filename}-1")) {
            print "  [ SELECT-SAMPLE: KEEP: ${part_filename}-1 ]\n" if $verbose;
            # nao existe outra alternativa
            return;
        }
        if (! (-e "$part_filename")) {
            print "  [ SELECT-SAMPLE: KEEP: $part_filename ]\n" if $verbose;
            # nao existe outra alternativa
            &glynx_file::my_rename ("${part_filename}-1", "$part_filename");
            return;
        }
        # existem ...suffix e ...suffix-1 -- deve escolher o maior
        if (+(-s "$part_filename") > +(-s "${part_filename}-1")) {
            print "  [ SELECT-SAMPLE: BIGGER: $part_filename ]\n" if $verbose;
            &glynx_file::my_unlink ("${part_filename}-1");
            return;
        }
        # ...suffix-1 is bigger -- delete ...suffix and rename ...suffix-1
        print "  [ SELECT-SAMPLE: BIGGER: ${part_filename}-1 ]\n" if $verbose;
        &glynx_file::my_rename ("${part_filename}-1", "$part_filename");
    }

    sub download_callback { 
        my($data, $response, $protocol) = @_; 
        my ($content_begin, $content_length, $Data_header, $data1);
        # "$filename", "$real_name", "$num_callback" are global
        # my ($real_filename);
        $num_callback++;

        # The callback function is called with 3 arguments: the data received this time, a
        # reference to the response object and a reference to the protocol object.

        # testa se a resposta e' do tipo 206 Partial Content
        # Content-Length: 10000
        # Content-Range: bytes 10329-20328/20329

        # print "  [ RANGE: RESPONSE->HEADER = ", $response->header, " ] \n" if $verbose;

        print "." if $verbose;

        # print "  [ DATA = ", escape($data), " ] \n" if $verbose;

        if ($num_callback == 1) {
            if ($response->code == 200) {
                ($content_length) = $response->header("Content-Length") =~ /(\d+)/;
                print "  [ content_length = $content_length ] \n" if $verbose;
                &glynx_file::modify_file_attrib($filename, 'Content-Length', $content_length) if $content_length;
            }
            if ($response->code == 206) { 
                ($content_begin) = $response->header("Content-Range") =~ /bytes\s+(\d+)-/;

                ($content_length) = $response->header("Content-Range") =~ /\/(\d+)/;
                print "  [ content_length = $content_length ] \n" if $verbose;
                &glynx_file::modify_file_attrib($filename, 'Content-Length', $content_length) if $content_length;


                #print "  [ BEGIN = ", $content_begin, " ] \n";
                if (-s "$filename" != $content_begin) {
                    if ($response->header("Content-Type") =~ /multipart\/x-byteranges;\s*boundary=(.*)$/) {
                        $Boundary = $1;
                        print "  [ MULTIPART: BOUNDARY = $Boundary ] \n" if $verbose;
                        # ($Data_header, $data1) = $data =~ /\015\012--${Boundary}\015\012(.*?)\015\012\015\012(.*)$/s;
                        ($data1) = $data =~ /\015\012--${Boundary}\015\012(.*)/s;
                        # ($Data_header, $data1) = split("\015\012\015\012", $data1, 2);
                        ($Data_header, $data) = $data1 =~ /^(.*?)\015\012\015\012(.*)$/s;
                        print "  [ MULTIPART: DATA-HEADER = $Data_header ] \n" if $verbose;
                        foreach( split("\015\012", $Data_header)) {
                            my ($header, $content) = split (': ', $_, 2);
                            $response->header($header => $content);
                        }

                        ($content_length) = $response->header("Content-Range") =~ /\/(\d+)/;
                        print "  [ content_length = $content_length ] \n" if $verbose;
                        &glynx_file::modify_file_attrib($filename, 'Content-Length', $content_length) if $content_length;

                        print "  [ MULTIPART: DATA: ", length($data), " BYTES ]\n" if $verbose;
                        # $data = $data1;

                        ($content_begin) = $response->header("Content-Range") =~ /bytes\s+(\d+)-/;
                        if (-s "$filename" != $content_begin) {
                            $response->code(500);
                            die "Wrong range: multipart"; 
                        }
                    }
                    else {
                        $response->code(500);
                        die "Wrong range"; 
                    }
                }
            } else {
                # Nao aceita resume
                #die "Nao aceita resume"; 
                # circula os arquivos de tentativas - depois deve escolher a melhor
                &select_best_sample($filename);
                &glynx_file::my_rename ("$filename", "${filename}-1");
                # normal download to file
                print "  [ NO-RESUME: Novo request ]\n" if $verbose;
                &glynx_file::my_create_empty ("$filename");
            }
        }

        $data =~ s/\015\012--${Boundary}\015\012(.*)//s if $Boundary;    # end of multipart
        # print "  [ CALLBACK: WRITE ", length($data), " BYTES ]\n" if $verbose;

        open(FILE, ">>$filename") or 
                die "Cannot write to $filename";
            binmode(FILE);
                local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
            print FILE $data;
        close(FILE);

        #print "  [ CALLBACK = ", $num_callback, " ] \n";
        #print "  [ RESPONSE->CODE = ", $response->code, " ] \n";
        #print "  [ RESPONSE->Content-Range = ", $response->header("Content-Range"), " ] \n";
    }


    sub timeout {
        $ua->timeout($_[0]);
    }

    sub download {
        my ($res, $file_size, $INET, $data);
        my ($content, $content_begin, $content_end, $content_range, $content_size, $content_difference);

        # $content_length = &glynx_file::get_file_attrib($filename, 'Content-Length');

        my %cnf = @_;
        for (keys %cnf) {
            eval ("\$" . $_ . " = '" . $cnf{$_} . "'");
        }

        unless ($ua) {
            $ua = LWP::UserAgent->new;
            $ua->agent($agent);
            $ua->timeout($timeout);
            $ua->env_proxy();
            $ua->cookie_jar(HTTP::Cookies->new(file => $cookie_file, autosave => 1)) if $cookie_file;
        }
        $real_name = $filename unless ($real_name);

        # GET:
        print "  [ GET: $url ]\n" unless $quiet;
        my $req = HTTP::Request->new(GET => $url);
        $req->referer($referer . '');
        # declare preference for "html" directory listings, if "ftp"
        $req->header('Accept' => 'text/html;q=1.0,*/*;q=0.6');
        $req->authorization_basic(split (/:/, $auth),2) if $auth;
        if ($mtime) {
            print "  [ If-Modified-Since: ", HTTP::Date::time2str($mtime), " ]\n" if $verbose;
            $req->header('If-Modified-Since' => HTTP::Date::time2str($mtime));
        }
        # $download_success = 1;
        $Boundary = '';
        # RESUME:
        # The first-byte-pos value in a byte-range-spec gives the byte-offset
        # of the first byte in a range. The last-byte-pos value gives the
        # byte-offset of the last byte in the range; that is, the byte
        # positions specified are inclusive. Byte offsets start at zero.
        # Range: bytes=9500-

            &select_best_sample($filename);
            $file_size = 0 + (-s "$filename");
            $num_callback = 0;
            print "  [ RESUME: from byte $file_size ]\n" if $verbose;
            $content_length = &glynx_file::get_file_attrib($filename, 'Content-Length');
            $content_length = $file_size + $DEFAULT_PART_SIZE unless $content_length;
            $req->push_header("Range" => "bytes=$file_size-$content_length") if $file_size;
            print "  [ REQUEST = ", $req->as_string, " ] \n" if $verbose;
            print "  [ REQUEST: PROXY = ", $ua->proxy, " ] \n" if $verbose;

            if ($url =~ /ftp:\/\//i) {
                $res = ftp::request($req, $timeout, \&download_callback);
            }
            else {
                if ($url =~ /(.*?)$post_separator(.*?)/) {
                    $content = $2;
                    $req->uri($1);
                    $req->content($content);
                    $req->method("POST");
                    $req->push_header("Content-Length" => length($content));
                    $req->content_type('application/x-www-form-urlencoded');
                    print "  [ POST: ", $req->as_string(), " ]\n" if $verbose;
                }
                # $res = http::request($req, $ua->proxy, \&download_callback, 65536, $TIMEOUT);
                $res = $ua->request($req, \&download_callback, 65536);    # 65536); 
            }

            # try to fix https
            if (($res->code == 501) and ($^O =~ /win32/i) and ($url =~ /https:\/\//i)) { 
                my $proxy    = $ua->proxy;        # format ?
                my $agent    = $ua->agent;
                my $opentype;
                eval " \$opentype = \$proxy ? INTERNET_OPEN_TYPE_PROXY : INTERNET_OPEN_TYPE_DIRECT; ";
                #                           [useragent, opentype, proxy, proxybypass, flags]
                $INET = new Win32::Internet(); # ($agent, $opentype, $proxy, '', INTERNET_FLAG_ASYNC);
                print "  [ HTTPS: WIN32: $url -- $filename ]\n" if $verbose;
                $data = $INET->FetchURL($url);
                # print "  [ HTTPS: WIN32: $data ]\n" if $verbose;
                if ($data) { 
                    $res->code(200);
                    open(FILE, ">$filename") or die "Cannot write to $filename";
                    binmode(FILE);
                    local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
                    print FILE $data;
                    close(FILE);
                    goto DOWNLOAD_OK;    # can't do redirects, etc. 
                }
            }

            if ($res->header("X-Died")) {
                # circula os arquivos de tentativas - depois deve escolher a melhor
                &select_best_sample($filename);
            } 
            # - look at the 3rd parameter on "206" 
            # (when available -- otherwise it may be 500 Timeout),
            # Content-Length: 637055 --> if "206" this is "chunk" size
            # Content-Range: bytes 1449076-2086130/2086131 --> THIS is file size
            $content_range = $res->header("Content-Range");
            if (($res->code == 206) and $content_range) {
                ($content_begin, $content_end, $content_size) = $content_range =~ /bytes\s+(\d+)-(\d+)\/(\d+)/;
                $file_size = -s "$filename";
                $content_difference = $content_size - $file_size;
                if ($content_difference > 0) { 
                    # $download_success = 0;    # not ready yet
                    print "  [ CONTENT: MISSING: $content_difference/$content_size BYTES ] \n" if $verbose;
                }
                else {
                    print "  [ CONTENT: OK: $file_size/$content_size BYTES ] \n" if $verbose;
                    $res->code(200);
                }
            }
    DOWNLOAD_OK:
        # arriving here from FILE: (cache) or from HTTP:
        return $res;
    } # end: download

    1;




=head1 NAME

    Glynx - a download manager. 

=head1 DESCRIPTION

    Glynx makes a local image of a selected part of the internet.

    It currently supports resume/retry, referer, user-agent, frames, distributed
    download (see C<--slave>, C<--stop>, C<--restart>).

    It partially supports: redirect (using file-copy), java,
    javascript, multimedia, authentication (only basic), 
    mirror, translating links to local computer (C<--makerel>),
    correcting file extensions,
    ftp, renaming too long filenames and too deep directories,
    cookies, proxy, forms, multipart responses.

    A http user interface is included.

    https: works only in Windows, with GET method. It should work
    in other OS and with other methods if openssl is installed.

    It can be used together with other download managers, making
    a distributed download process.

=head1 SYNOPSIS

=over

=item Work with the http interface:

     glynx.pl

     - then type in the given address in your browser.

     - you will need other instances of the program (at least one) working
     as slaves. Just double-click the program again to open another instance, or
     use:

     glynx.pl --slave


=item Command-line, do-everything at once:

     glynx.pl [options] <URL>


=item Save work to finish later: 

     glynx.pl [options] --dump="download-list-file" <URL>


=item Finish saved download:

     glynx.pl [options] "download-list-file"

=item Network mode (server/client/slave)

=item - Clients: 

     glynx.pl [options] --dump="download-list-file" <URL>

        or:

     glynx.pl --server [--port=8081]
        Starts a client-interface http server


=item - Slaves (will wait until there is something to do): 

     glynx.pl [options] --slave

=back

=head1 HINTS

    If you don't use command line options, the first program instance will be an http server.
    Remaining instances will be slaves ("executors").

    How to create a default configuration:

        Start the program with all command-line configurations, plus --cfg-save
        or:
        1 - start the program with --cfg-save
        2 - edit glynx.ini file

    --subst, --exclude and --loop use regular expressions.

       http://www.site.com/old.htm --subst=s/old/new/
       downloads: http://www.acme.com/new.htm

       - Note: the substitution string MUST be made of "valid URL" characters

       --exclude=/\.gif/
       will not download ".gif" files

       - Note: Multiple --exclude are allowed:

       --exclude=/gif/  --exclude=/jpeg/
       will not download ".gif" or ".jpeg" files

       It can also be written as:
       --exclude=/\.gif|\.jp.?g/i
       matching .gif, .GIF, .jpg, .jpeg, .JPG, .JPEG

       --exclude=/www\.site\.com/
       will not download links containing the site name

       http://www.site.com/bin/index.htm --prefix=http://www.site.com/bin/
       won't download outside from directory "/bin". Prefix must end with a slash "/".

       http://www.site.com/index%%%.htm --loop=%%%:0..3
       will download:
         http://www.site.com/index0.htm
         http://www.site.com/index1.htm
         http://www.site.com/index2.htm
         http://www.site.com/index3.htm

       - Note: the substitution string MUST be made of "valid URL" characters

    - For multiple exclusion: use "|".

    - Don't read directory-index:

        ?D=D ?D=A ?S=D ?S=A ?M=D ?M=A ?N=D ?N=A =>  \?[DSMN]=[AD] 

        To change default "exclude" pattern - put it in the configuration file

    Note: "File:" item in dump file is ignored

    You can filter the processing of a dump file using --prefix, --exclude, --subst

    If after finishing downloading you still have ".PART._BUSY_" files in the 
    base directory, rename them to ".PART" (the program should do this by itself)

    Don't do this: --depth=1 --out-depth=3 because "out-depth" is an upper limit; it 
    is tested after depth is generated. The right way is: --depth=4 --out-depth=3

    This will do nothing:

     --dump=x graphic.gif

    because the dump file gets all binary files.

Errors using https:

     [ ERROR 501 Protocol scheme 'https' is not supported => LATER ] or
     [ ERROR 501 Can't locate object method "new" via package "LWP::Protocol::https" => LATER ]

    This means you need to install at least "openssl" (http://www.openssl.org), Net::SSLeay and IO::Socket::SSL


=head1 COPYRIGHT

    Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    This program is free software; you can redistribute it and/or
    modify it under the same terms as Perl itself.
    This program was based on examples in the Perl distribution.


    If you use it/like it, send a postcard to the author. 



=head1 COMMAND-LINE OPTIONS

Very basic:
  --version         Print version number (1.031.003) and quit
  --verbose         More output
  --quiet           No output
  --help            This page
  --cfg-save        Save configuration to file "glynx.ini"
  --base-dir=DIR    Place to load/save files (default is "k:/download/download/")
Development only:
  --make-cpan       Preprocess files to make Glynx distribution
Download options are:
  --sleep=SECS      Sleep between gets, ie. go slowly (default is 1)
  --prefix=PREFIX   Limit URLs to those which begin with PREFIX (default is URL)
                    Multiple --prefix are allowed
  --depth=N         Maximum depth to traverse (default is 0)
  --out-depth=N     Maximum depth to traverse outside of PREFIX (default is 0)
  --referer=URI     Set initial referer header (default is ".")
  --limit=N         A limit on the number documents to get (default is 10000)
  --retry=N         Maximum number of retrys (default is 5)
  --timeout=SECS    Timeout value - increases on retrys (default is 30)
  --agent=AGENT     User agent name (default is "Mozilla/3.0 (WinNT; I)")
  --mirror          Checks all existing files for updates (default is --nomirror)
  --mediaext        Creates a file link, guessing the media type extension (.jpg, .gif)
                    (Windows perl makes a file copy) (default is --nomediaext)
  --makerel         Make Relative links. Links in pages will work in the
                    local computer.
  --auth=USER:PASS  Set authentication credentials
  --cookies=FILE    Set up a cookies file (default is no cookies)
  --name-len-max    Limit filename size (default is 30)
  --dir-depth-max   Limit directory depth (default is 8)
Multi-process control:
  --slave           Wait until a download-list file is created (be a slave)
  --server          Be an http user-interface server, with slave processes
  --children        How many slaves will this process spawn (default is "0")
  --port=N          Http server TCP/IP port (default is "8081")
  --stop            Stop slave
  --restart         Stop and restart slave
Other:
  --indexfile=FILE  Index file in a directory (default is "_INDEX_.HTM")
  --part-suffix=.SUFFIX (default is "._PART_") (example: ".Getright" ".PART")
  --dump=FILE       (default is "") make download-list file, 
                    to be used later
  --dump-max=N      (default is 100) number of links per download-list file
  --invalid-char=C  (default is "$")
  --exclude=/REGEXP/x (default is "") Don't download matching URLs
                    Multiple --exclude are allowed
  --loop=REGEXP:INITIAL..FINAL (default is "") (eg: xx:a,b,c  xx:'01'..'10')
  --subst=s/REGEXP/VALUE/x (default is "") ("\" must be written "\\")
  --404-retry       will retry on error 404 Not Found (default). 
  --no404-retry     creates an empty file on error 404 Not Found.

=head1 README

    Glynx - a download manager (robot) 

    INSTALLATION

      WINDOWS:

        - Copy the script to your download directory, such as c:\glynx or c:\temp
          Do not use it on c:\ because it will create files there.

        - It can be used as command prompt script, 
          or you can double click it to start it as a server or slave.

          You can open the program more times, if you have enough memory.
          If the programs are started from the same directory, they will work together,
          even if they are on different computers.

        - As a server, you can access it from your browser when you see the message:

            User interface server running at http://localhost:8081/
 
        - The latest ActivePerl has all the modules needed.

      UNIX/LINUX:

        - Please note that the software will create MANY files in 
          its work directory, so it is advisable to have a dedicated 
          sub-directory for it.

        - chmod +x glynx.pl                       (if necessary)
          pod2html glynx.pl -outfile=glynx.htm      (this is optional)

        - under RedHat 6.2 I had to upgrade or install these modules:
          HTML::Tagset MIME:Base64 URI HTML::Parser Digest::MD5 libnet
          libwww-perl

        - to use https you will need:
          openssl (www.openssl.org) Net::SSLeay IO::Socket::SSL


    How do I run Glynx?

    Before you run Glynx, you should be connected to
    the Internet. 

    Glynx can then be run by typing 'perl glynx.pl [options] http://...' 
    in a command prompt or terminal window.

    It can also be run with 'perl glynx.pl [options] --server'
    and then accessed by an internet browser.


    How to create a default configuration:

        Start the program with your command-line options, plus --cfg-save
        or:
         Start the program with --cfg-save, then edit glynx.ini file.


    Proxy, Firewalls

        1.Add the setting HTTP_proxy, with your proxy name as the
        value (you must include "http://" ), followed by a colon
        and the proxy port, if applicable; e.g., "http://proxy:8080" 

        2.If you require a user name and/or password to access your
        proxy, add the settings HTTP_proxy_user and
        HTTP_proxy_pass, with your user name and password as the
        respective values. 

        It is also possible to use an FTP proxy. See the Net::FTP
        documentation.


    COPYRIGHT

    Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    This program is free software; you can redistribute it and/or
    modify it under the same terms as Perl itself.
    This program was based on examples in the Perl distribution.

    -----------


=pod SCRIPT CATEGORIES

Web
Networking
CGI

=pod OSNAMES

any

=head1 TO-DO

    Glynx - a download manager. 

    OTHER PROGRAMS LIKE THIS

        http://www.w3.org/Robot/

        http://langfeldt.net/w3mir/

        A robot site: http://www.botspot.com/

        The robot exclusion standard is described in
        http://info.webcrawler.com/mak/projects/robots/norobots.html. 

    SHORT TO-DO

        - More command-line compatibility with lwp-rget
        - Complete the user interface


    GOALS

        generalize:
        option to use (external) java and other script languages to extract links
        configurable file names and suffixes, filesystem limits
        configurable dump file format
        plugins
        more protocols; download streams
        language support

        adhere to perl standards: pod documentation, distribution

        parallelize things and multiple computer support

        cpu and memory optimizations

        accept hardware/internet failures: restartable

        reduce internet traffic: minimize requests, cache everything

        from perlhack.pod:
         Keep it fast, simple, and useful.
        Keep features/concepts as orthogonal as possible (what's orthogonal?).
        No arbitrary limits (platforms, data sizes, cultures).
        Keep it open and exciting to use/patch/advocate Perl everywhere.
        Either assimilate new technologies, or build bridges to them.


    PROBLEMS (not bugs)

        - It takes some time to start the program; not practical for small single file downloads.

        - It should have a graphical front-end; there exists a web front-end.

        - Hard to install if you don't have Perl or have outdated Perl modules. It works fine
          with Perl 5.6 modules.

        - slave mode uses "dump files", and doesn't delete them.

        - children processes are silent - don't have consoles

        - doesn't recognize if a site does not accept simultaneous connections

        - possibly requires Perl 5.6 -- 5.5 doesn't like "our"
        is this a bug?

    TESTS

        - test: counting MAX_DOCS with retry
        - test: out-depth
        - test: redirect 3xx
        - test: makerel
        - test: makerel with javascript/java
        - test: env_ftp
        - test: unknown protocol is a fatal error (on page links)
        - test: folded directories
        - test: escaped save/compare for all URL names


    BUGS

        - translation file title shows local directory name, instead of url

        - ftp:
            400 (Bad Request) FTP return code 350
            Content-Type: text/plain
            Restarting at 0.

        - 030: loop doesn't expand (user-interface)
        - 030: User interface doesn't know base-dir

        - breaks in multiprocess mode (children ne 0)

        - if 2 processes get the same dump-file it is deleted.

        - restart should check if program file-size is stable before restarting.
        it may be being update through a slow ftp session

        - testing busy_files for expiration uses file age when script started (-M)
        should use age NOW.
        - busy-dump-files expire in 12h -- should be configurable

        - modify_file_attrib doesn't make a backup before writing, or locking -- may lose contents.

        - sometimes an empty packet is received, truncating the communication.

        - content_base MAY be wrong if query contains / in line and we receive a redirect:
            $path =~    s|^(.*)\/||g;   # remove directory

        - <form name=aaa> sends aaa='' in the query

        - log user-interface ip number doesn't work

        - modify ftp.pm to return "file/link" information -- save "dir" as _index_.htm
        - download leaks to "/" instead of base-dir

        - restart/stop must rename .grx._BUSY_ => .grx
        - slave should spawn if depth > 0 AND filetype = html; 
        - test if dump-file exists - don't overwrite .err
        - control whether a slave can dump dump-files
        They could dump after processing all depth>0, AND only if there were any.

        - problems downloading java
        referer:  http://www.bera.org/
        class:    http://www.bera.org/java/JambaAnimator.trolley_anim.class

        - don't show "RENAME: CAN'T FIND" and "URL:" , unless VERBOSE

        - name-list shows both:
            index.html?option=e&topic=alimentos
            index.html$option=e&top6804
        after reprocessing with local links

    OPTIMIZATIONS

        - cache the name-list; cache dir-names

        - use an optional database connection

        - Persistent connections;
        - take a look at LWP::ParallelUserAgent
        - take a look at LWPng for simultaneous file transfers
        - take a look at LWP::Sitemapper

        - use eval around things do speed up program loading

         - speed up search using stacks indexed per directory or per site


    DOCUMENTATION


        - make a --install (extract included files, pod).

        - make-cpan should use binmode

        - document the short command-line options

        - FTP proxy


    USER INTERFACE

        - Dr. Watson when user asks for a stop -- will let it disabled
        - Dr. Watson not very happy with "restart" too
        - "stop" in httpd doesn't work

        - Linux core-dump when accessing http-server

        - user-interface has NO security
        - user authentication, if user-ip not equal server-ip
        - should support some authentication. Maybe use --auth=

        - user-interface does almost no test for valid field contents

        - what happens when choose base-dir in user interface?
        - probably should not be possible

        - how to save-config in user interface?

        - read lynx help, try to make compatible commands

        - status page

        - log file: missing time/date and user ip number

        - how to do user-answered forms? (POST)
        - maybe an http-client with push
        - http-client could use http-server as a "proxy"

        - rename "old" .grx._BUSY_ files to .grx (timeout = 1 day?)
          option: touch busy file to show activity

        - scripting option (execute sequentially instead of parallel).
        POST with interactive mode or from-file

        - perl/tk front-end; finish web front end

         - save "to-do" file each 10 minutes, so it can restart.

        - timed downloads - start/stop hours

         - option portuguese/english/other

        - accept --url=http://...
        - accept --batch=...grx

        - arrays for $LOOP,$SUBST; accept multiple URL

        - makerel: make relative links to OTHER sites should be an option
        - makerel: should work on applets.

        - put / / on exclude, etc if they don't have

        - graphical-interface: option iso9660

        - option compress-extension:  .tar.gz -> .TGZ (for iso9660)
        - extension .ab---z -> .ABZ

        - _names_.htm should point to ../_names_.htm ("Up to higher level directory")
        and to subdir/_names_.htm; header = "Directory listing of ... "
        - directories should be of type "DIR"
        - better formatted name-list, as in ftp-dir

        - make a logo
        - include all options, help, in graphical interface
        - graphical interface easier to configure
        - stop-task in cgi (--restart + delete grx file)


    PROTOCOL

        - "Los Alamos Web Server (Unix)"
            - looks like it is sending empty packets, which the LWP interprets as end
            of transmission (closes connection).
            - HTTP/1.0 403 Forbidden
                403 BOGUS
                query range malformed
                Bad Request
                - probably caused by range 9999- (missing end-byte of range)

        - use HTTP::Status for message codes

        - pass "VERBOSE" to ftp.pm

        - Fatal: https:// in URI.pm

        - 301/302: Moved -- should save an intermediate file to keep
            links working locally

        - use robot-rules

        - create variable max-link-len (now is 500 bytes)

        - improve forms support (read rfc...)
        - do not press 2 "submits" at the same time; do not press TYPE=RESET
        - explore "options"

        - ignore/accept comments: <! a href="..."> - nested comments???
        but accept javascript

        - should we read vbasic too?

         - check: 19.4.5 HTTP Header Fields in Multipart Body-Parts
            Content-Encoding
            Persistent connections: Connection-header
            Accept: */*, *.*

         - pnm protocol: - realvideo, .rpm files, rtsp: -- RFC 2326

         - streams

         - gnutella

         - 401 Authentication Required, generalize abort-on-error list

        - install and test "https"; do a how-to.

        - 401 - auth required -- supply name:pass

        - implement "If-Range:"

        - better error handling on protocol error, for page links;
          wrong link "c:\xxx" is a fatal error

        - make auth-digest

        - AUTH should always send nnn:ppp@url for auth-basic (always...)

        - ftp_proxy
        - --proxy option, overriding env_proxy

        The LWP::Simple interface will call env_proxy() for you automatically.
        Applications that use the $ua->env_proxy() method will normally not use the
        $ua->proxy() and $ua->no_proxy() methods.


    PERL

        - make it a Perl module (crawler, robot?), generic, re-usable.
        - maybe a "LWP::Restartable"

        - funny Win-NT error "can't find" something:
          "The system cannot find the file specified" - active perl installation error

        - javascript interpreter option


    OTHER

        - file locking
        - send some html requests to slave processes - will require some 
        additional controls.

        - does Location / Redirect count as a depth level ?

        - simultaneous download from mirror sites
        - use ftpsearch, others, to find mirror sites.

        - name-list for other sites is creating too many empty directories.
        empty-directories should be created only when necessary, and file names
        should be stored somewhere else until the directories are created.
        - should be change file-name after download (in case of mime-type mistakes)

        - "Are we reprocessing the cache?" should trigger a filter to remove all /_INDEX_.HTM

        - should make backup when mirroring (option)

        - finish "my_link"
        - perl "link" is copying instead of linking, even on linux

        - use the name-lookup table to make up for links/redirects

        - lwp-rget "depth" is "0" when we use "1"

        - Doesn't recreate unix links on "ftp". 
        Should do that instead of duplicating files (same on http redirects).
        - http server to make distributed downloads across the internet
        - use eval to avoid fatal errors; test for valid protocols

        - don't ignore "File:" on dump-file
        - execute/override download-list-file "File:"
          option: use --subst=/k:\\temp/c:\\download/

        - change the retry loop to a "while"


    Generalization, user-interface: 

        - --log-headers should be an option

        - option to understand robot-rules

        - make .glynx the default suffix for everything

        - try to support <form> through download-list-file

        - internal small javascript interpreter

        - config comment-string in download-list-file
        - config comment/uncomment for directives
         - arquivo default para dump sem parametros - "dump-[host]-1"?
        make backup on overwrite dump
        - plugins: for each chunk, page, link, new site, level change, dump file change, 
                max files, on errors, retry level change. Opcao: usar callbacks, ou
          fazer um modulo especializavel.
        - dump suffix option
        - use environment
         - aceitar configuracao --nofollow="shtml" e --follow="xxx"
         - controle de hora, bytes por segundo

        - packing for distribution, include rfcs, etc?

        - installation hints, package version problems (abs_url)

         - make an object for link-lists - escolher e especializar um existente.

         - documentar melhor o uso de "\" em exclude e subst
    
         - Renomear a extensao de acordo com o mime-type (ou copiar para o outro nome).
            --on-redirect=rename 
               --on-redirect=copy
               --on-redirect=link
            --on-mime=...

         - tamanho maximo do arquivo recebido
        - usar: $ua->max_size([$bytes]) - nao funciona com callback

         - disk full or unaccessible / alternate dir

         - montar links usando java ?

         - a biblioteca LWP faz sozinha Redirection 3xx ?

         - criar arquivo PART com tamanho zero quando da erro 408 - timeout


    COMMAND LINE OPTIONS

        - new options in user interface: 
            - form field/value (translate to SUBST)

         - "--proxy=http:"1.1.1.1",ftp:"1.1.1.1"
            "--proxy="1.1.1.1"
              acessar proxy: $ua->proxy(...) Set/retrieve proxy URL for a scheme: 
              $ua->proxy(['http', 'ftp'], 'http://proxy.sn.no:8001/');
              $ua->proxy('gopher', 'http://proxy.sn.no:8001/');

        - accept empty "--dump" or "--nodump"

        --backup / --nobackup
            when mirroring, overwriting dump, or reprocessing links.

         --max-mb=100
             limita o tamanho total do download

         --nospace
             permite links com espacos no nome (ver lwp-rget)

         --include=".exe" --nofollow=".shtml" --follow=".htm"
             opcoes de inclusao de arquivos (procurar links dentro)

        --exclude-hreftext= --exclude-buttontext
            exclude a link by the "href" or "submit" text 

         --full ou --depth=full
             opcao site inteiro

         --chunk=128000

        --dump-all
            grava todos os links, incluindo os ja existentes e paginas processadas

        --post-separator

      Not implemented, but won't generate fatal errors:
      --hier            Download into hierarchy (not all files into cwd)
      --iis             Workaround IIS 2.0 bug by sending "Accept: */*" MIME
                        header; translates backslashes (\) to forward slashes (/)
      --keepext=type    Keep file extension for MIME types (comma-separated list)
      --nospace         Translate spaces URLs (not #fragments) to underscores (_)
      --tolower         Translate all URLs to lowercase (useful with IIS servers)

    ------------------

=head1 CHANGES

    Glynx - a download manager. 

    Version history:

     1.031.003
        - file translations does not store repeated short names
        - split into glynx_file.pm

     1.031
        - split into glynx.pm
        - correct "port 21" reprocessing, using quotemeta
        - does not request partial file if initial byte is zero
        - preprocess options before inserting dump-file urls
        - corrected reprocessing of "url/.."

     1.030
        - default children is zero
        - better --make-cpan
        - will go to slave mode if server cannot start
        - sleep between spawns
        - compiled with strict
        - $DEFAULT_PART_SIZE is 16k
        - busy-dump-files expire in 12h
        - makes error-dump-file on last retry
        - stores Content-Length to use in Range requests.
        - makes unique dump filenames
        - timeout has a maximum
        - supports secure (https) GET on windows, even without openssl.

     1.029
        - accepts simple multipart responses (8bit-byte encoded, single file)
        - http://host/path/ prefix was http://host in user-interface
        - corrected user-interface options, help, more/less options
        - removed file eg/glynx_httpd.pl
        - corrected slave dumper; dump-file options

     1.028
        - better interface
        - corrected interface dump-file
        - will fork slaves, unless --children=0
        - if there is no url in command line, will be server
        - if there is a url on command line, will not spawn slaves
        - doesn't try to link files anymore (win98 fatal)
        - http server has a tick time
        - better query splitter
        - make-cpan uses binmode
        - read/save auth
        - tested: cookies
        - codebase .= '/'
        - user interface calculates prefix

     1.027
        --server, --port = http server
        - takes action on 301 and 302 - Moved ...
        - "Location" base is url, instead of referer
        - calls equery on make_filename
        - some hacks to glue parts together and make one big file for CPAN (--make-cpan)
        - includes ftp.pm in the same file.
        - POST sends Content-Length

     1.026:
        - simple GET and PUT forms
        - reject link "c:\"
        - bigger max-link-len (500 bytes)
        - slave intervals on SLEEP if it is active; otherwise on TIMEOUT
        - ftp-dir sends content-location
        - finds ftp.pm in program's directory
        - better make-dir
        - escapes single-quotes reading config-file
        - corrected: didn't start if had --prefix
        - resume ftp transfers. Needs the custom ftp.pm module.
        - base-dir is always absolute
        - makerel: default is don't make backup
        - verify each subdirectory for transformations
        - sites with ports translate back correctly to site:port
        - make_shorter_name receives untransformed url also
        - separated pod file
        - saves Content-Type
        - can save any file attribute (delimiter is space)
        - reprocessing the cache is slower, due to relative links reconstruction
        - might create directories for linked sites, if it is necessary to create reference files

     1.025:
        - correction (again...) in slave mode variables save/restore
        - included simple web front-end in eg directory

     1.024:
        new options:
          --name-len-max=   Limit filename size
          --dir-depth-max=  Limit directory depth
        --cookies=FILE
        --auth=
        --makerel         Make relative links

        - makerel will make relative links to other sites; 
        will process last depth; 
        save modified page and make a backup of the original page.
        - better error handling on command line url "protocol error"
        - use env_proxy
        - my_link started
        - $RETRY_TIMEOUT_MULTIPLIER set to 2


     1.023:
        - better redirect, but perl "link" is copying instead of linking
        - --mirror option (304)
        - --mediaext option
        - sets file dates to last-modified


     1.022:
        - multiple --prefix and --exclude seems to be working
        - uses Accept:text/html to ask for an html listing of the directory when in "ftp" mode.
        - corrected errors creating directory and copying file on linux


     1.021:
        - uses URI::Heuristic on command-line URL
        - shows error response headers (if verbose)
        - look at the 3rd parameter on 206 (when available -- otherwise it gives 500),
                Content-Length: 637055        --> if "206" this is "chunk" size
                Content-Range: bytes 1449076-2086130/2086131 --> THIS is file size
        - prefix of: http://rd.yahoo.com/footer/?http://travel.yahoo.com/
            should be: http://rd.yahoo.com/footer/
        - included: "wav"
        - sleep had 1 extra second
        - sleep makes tests even when sleep==0


     1.020: oct-02-2000
        - optimization: accepts 200, when expecting 206
        - don't keep retrying when there is nothing to do
        - 404 Not Found error sometimes means "can't connect" - uses "--404-retry"
        - file read = binmode


     1.019: - restart if program was modified (-M $0)
        - include "mov"
        - stop, restart


     1.018: - better copy, rename and unlink
        - corrected binary dump when slave
        - comparacao de tamanho de arquivos corrigida
         - span e' um comando de css, que funciona como "a" (a href == span href);
          span class is not java


     1.017: - sleep prints dots if verbose.
        - daemon mode (--slave)
        - url and input file are optional


     1.016: sept-27-2000
        - new name "glynx.pl"
        - verbose/quiet
        - exponential timeout on retry
        - storage control is a bit more efficient
        - you can filter the processing of a dump file using prefix, exclude, subst
        - more things in english, lots of new "to-do"; "goals" section
        - rename config file to glynx.ini


     1.015: - first published version, under name "get.pl"
        - rotina unica de push/shift sem repeticao
        - traduzido parcialmente para ingles, revisao das mensagens


     1.014: - verifica inside antes de incluir o link
         - corrige numeracao dos arquivos dump
         - header "Location", "Content-Base"
        - revisado "Content-Location"


     1.013: - para otimizar: retirar repeticoes dentro da pagina
        - incluido "png"
        - cria/testa arquivo "not-found"
        - processa Content-Location - TESTAR - achar um site que use
        - incluido tipo "swf", "dcr" (shockwave) e "css" (style sheet)
         - corrige http://host/../file gravado em ./host/../file => ./file
         - retira caracteres estranhos vindos do javascript: ' ;
        - os retrys pendentes sao gravados somente no final.
        - (1) le opcoes, (2) le configuracao, (3) le opcoes de novo


     1.012: - segmenta o arquivo dump durante o processamento, permitindo iniciar o
        download em paralelo a partir de outro processo/computador antes que a tarefa esteja
        totalmente terminada
        - utiliza indice para gravar o dump; nao destroi a lista que esta na memoria.
        - salva a configuracao completa junto com o dump; 
        - salva/le get.ini


     1.011: corrige autenticacao (prefix)
        corrige dump
        le dump
        salva/le $OUT_DEPTH, depth (individual), prefix no arquivo dump


     1.010: resume
        se o site nao tem resume, tenta de novo e escolhe o melhor resultado (ideia do Silvio)


     1.009: 404 not found nao enviado para o dump
           processa arquivo se o tipo mime for text/html (nao funciona para o cache)
           muda o referer dos links dependendo da base da resposta (redirect)
           considera arquivos de tamanho zero como "nao no cache"
           gera nome _INDEX_.HTM quando o final da URL tem "/". 


     1.008: trabalha internamente com URL absolutas
           corrige vazamento quando out-nivel=0


     1.007: segmenta o arquivo dump 
           acelera a procura em @processed
           corrige o nome do diretorio no arquivo dump

    -----------------


=head1 ACCESSORY SCRIPTS

These are some scripts that might help making Glynx more user-friendly.

    Glynx - a download manager. 

    ACCESSORIES

    glynx_slave.pl

        A shortcut for starting glynx in slave mode, to
        execute jobs.
        Configuration:
            $base_dir -- where glynx look for jobs.

    glynx.cgi

        User interface, installable in a web-server.

    glynx_menu.pl

        Subroutines used by glynx.cgi, very basic.
        Configuration:
            $base_dir -- where glynx look for jobs;
            $log_dir  -- where we write the log file.


    COPYRIGHT

    Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    This program is free software; you can redistribute it and/or
    modify it under the same terms as Perl itself.
    This program was based on examples in the Perl distribution.

    -----------


=head2 glynx.cgi - a CGI wrapper to run the user-interface

    #!/usr/bin/perl
    # Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    # This program is free software; you can redistribute it and/or
    # modify it under the same terms as Perl itself.
    #
    # This program was based on examples in the Perl distribution,
    # mainly from Gisle Aas.
    # 
    # If you use it/like it, send a postcard to the author. 
    # Find the latest version in CPAN or http://www.pucrs.br/flavio

    use CGI qw/:standard/;
    require "./glynx_menu.pl";
    &glynx_configure;
    $query = new CGI;
    %in = $query->Vars;
    &glynx_menu(%in);

    1;



=head2 glynx_httpd.pl - a stand-alone CGI server for running the user-interface.


=head2 glynx_slave.pl - a shortcut for running Glynx in daemon mode.

    #!/usr/bin/perl
    # Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    # This program is free software; you can redistribute it and/or
    # modify it under the same terms as Perl itself.
    #
    # This program was based on examples in the Perl distribution,
    # mainly from Gisle Aas.
    # 
    # If you use it/like it, send a postcard to the author. 
    # Find the latest version in CPAN or http://www.pucrs.br/flavio

    $base_dir = "d:/download_glynx/";
    exec "../glynx.pl --slave --base-dir=\"$base_dir\"";
    1;


=head2 glynx_menu.pl - a small html-based user-interface.

    #!/usr/bin/perl
    # Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    # This program is free software; you can redistribute it and/or
    # modify it under the same terms as Perl itself.
    #
    # This program was based on examples in the Perl distribution,
    # mainly from Gisle Aas.
    # 
    # If you use it/like it, send a postcard to the author. 
    # Find the latest version in CPAN or http://www.pucrs.br/flavio

    # THESE ARE SUBROUTINES - THIS FILE IS NOT INTENDED TO BE EXECUTED

    sub glynx_configure {
        $base_dir = "d:/download_glynx/";
        $log_dir =  "d:/download_glynx/";
    }

    sub glynx_menu {
        my %in = @_;
        print <<EOT;
    <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
    <HTML><HEAD><TITLE>Glynx - Download Manager</TITLE>
    </HEAD><BODY><H1>Glynx - Download Manager</H1>
EOT

        $_ = $in{url};
        tr/\\/\//;
        ($in{url}, $resto) = /^(.*?)(\/?)$/;
        $_ = $in{url};
        if ((! /http:/) and (! /ftp:/)) {
            $_ = "http://" . $_;
            s/\/\/\//\/\//;
        }
        $in{url} = $_;

          print <<ENDOFTEXT;
    $addr
     <P><FORM method="post">
     URL: <input name="url" value="$in{url}" size="80"><br>

     Depth: <input name=depth value=$in{depth}><br>

     Prefix: <input name="base" value="$in{base}" size="60"><br>

     Label: <input name=label value=$in{label}><br>

     Other options: <input name=options value=$in{options}><br>

     <input type="submit">
     </FORM>

    <pre>Obs: 
      URL = http://site/directory/file

      Depth 0 = 1 file
      Depth 1 = 1 file + links & images
      Depth 2 = links their images

      Prefix = site/directory, limits unnecessary downloads (optional)

      Label = job name

      Options = (not ready yet)
    </pre>
ENDOFTEXT


        if (($in{url} ne "") and ($in{url} ne "http\:\/\/")) { 

            print "<hr>";

            $in{depth} = $in{depth} + 0;
            $in{depth} = 5 if ($in{depth} > 5);

            $in{label} =~ s/[\s\+]//;
            $in{label} = "default" unless $in{label};

            $in{base} = $in{url} unless $in{base};

            open (FILE, ">>${log_dir}glynx.log");
            print FILE <<EOT;
    ip:    $addr
    URL:   $in{url}
    Depth: $in{depth}
    Base:  $in{base}
    Label: $in{label}

EOT
            close (FILE);

            $cmd = "$base_dir$in{label}.grx";
            open (FILE, ">$cmd");
            print FILE <<EOT;
    //DUMP: '$in{label}'
    //PREFIX: '$in{base}'
    //
    URL: $in{url}
    //Referer: .
    //Depth: $in{depth}
EOT
    close (FILE);

          }
        print end_html;
    }

    1;



=cut
#######    END: CPAN glued parts



package myCGI;

use CGI qw/:standard unescape/;
use vars '@ISA';
@ISA=qw(HTTP::Daemon::ClientConn);

sub cgi
{
    my ($self, $a, $query, $r);
    my %in = ();
        $self = shift;
        $r = shift;
    my ($method, $url, $header, $content, $headers) = 
       ($r->method, $r->url, $r->header, $r->content, $r->headers_as_string);
    select $self;
    ($query) = $url =~ /\?(.*)/;
    $query = join("\&", $query, $header);
    # print "(method, url, header, content, headers, query)  = 
    #     ($method, $url, $header, $content, $headers, $query)  ";
    foreach (split("\&", $query)) {
        $in{$1} = unescape($2) if /(.*)=(.*)/;
    }
    &main::glynx_menu($query, %in);
    select STDOUT;
}




package main;

our $VERSION = "1.031.003";
our $progname = $0;
our ($prog_dir) = $progname =~ /(.*)[\/\\]/;



    # Copyright (c) 2000 Flavio Glock <fglock@pucrs.br>. All rights reserved.
    # This program is free software; you can redistribute it and/or
    # modify it under the same terms as Perl itself.
    #
    # This is a subroutine -- not to be executed directly

    use strict;

    my $MAX_OPTION = 3;

    # $addr will store client-IP -- we don't have that yet
    my $addr;

    # @var isn't used 
    my (@cmd, @opt, @val, @var);

    sub preprocess_menu_options {
        my(@a, $i, $tmp);

        @a = split("\n", &list_options);
        $i = 0;
        @cmd = ();
        foreach(@a) {
            ($cmd[$i], $opt[$i], $val[$i]) = /^\s*--([\w\-]*)=([\w\-\:\.\/]*)\s*(.*)$/;
            ($cmd[$i], $val[$i]) = /^\s*--([\w\-]*)\s*(.*)$/ unless $opt[$i];
            unless ($cmd[$i]) {
                $i--;
                ($tmp) = /^\s\s*(.*)$/;
                $val[$i] .= "<br>$tmp" if $tmp;
            }
            else {
                $var[$i] = '';
                $var[$i] = '$' . $cmd[$i] if $cmd[$i];
                $var[$i] =~ tr/a-z\-/A-Z\_/;
            }
            $i++;
        }
    }

    sub glynx_footer {
        print <<EOT;
    </FORM><hr>
    <font size=-1>Glynx $main::VERSION - Copyright (c) 2000 Flavio Glock. All rights reserved. This program is free software</font>
EOT
        print end_html;
    }

    sub glynx_menu_restart {
        my %in = @_;
        &main::make_restart;
        print "<h2>Restarting Glynx</h2>";
        print "Please wait about one minute before pressing BACK<br><br>";
    }

    sub glynx_menu_help {
        my %in = @_;
        my($i);
        print "<table border=1>";
        foreach(0 .. $#cmd) {
            $i = $_;
            if ($val[$i]) {
                # $val[$i] =~ s/\\/\\\\/g;    # escape for printing
                print "<tr><td>$cmd[$i]</td><td>$val[$i]</td></tr>";
            }
            else {
                print "</table>";
                print "<table border=1>";
            }
        }
        print "</table>";

    }

    sub glynx_print_menu {
        my (%in, $query) = @_;
        my ($option);
          print <<EOT;
     URL: <input name="url" value="$in{url}" size="80"><br>
     Depth: <input name=depth value=$in{depth}><br>
     Prefix: <input name="prefix" value="$in{prefix}" size="60"><br>
     Job name: <input name=dump value=$in{dump}><br>
EOT

        # Other options: <input name=option1 value=$in{option1}> <input name=val1 value=$in{val1}><br>
        foreach (1 .. $MAX_OPTION) {
        #    $_ = 1;
            print " Option: ";
            $option = $_; # "option$option";
            print "<select name=option", $option,">";
            print "<option>", $in{"option$option"}, "<option>";
            print join('<option>', (@{main::Config_Vars}, @{main::Config_Arrays}));
            print "</select>\n";
            print " <input name=val", $option," value=", $in{"val$option"}, "><br>\n";
        }
          print "<input type=submit name=do value=Download>";
    }

    sub glynx_menu {
        my ($query, %in) = @_;
        print <<EOT;
    <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
    <HTML><HEAD><TITLE>Glynx - Download Manager</TITLE>
    </HEAD><BODY><H1>Glynx - Download Manager</H1>
    <FORM method="post">
EOT

        # print "  [ query $query ]  ";

        $_ = $in{url};
        tr/\\/\//;
        # ($in{url}, $resto) = /^(.*?)(\/?)$/;
        # $_ = $in{url};
        if ((! /http.?:/) and (! /ftp:/)) {
            $_ = "http://" . $_;
            s/\/\/\//\/\//;
        }
        $in{url} = $_;

        if ($in{do} eq 'Help') {
            &glynx_menu_help(%in);
              print "<input type=submit name=do value=Back>";
            goto footer;
        }
        elsif ($in{do} eq 'Restart') {
            &glynx_menu_restart(%in);
              print "<input type=submit name=do value=Back>";;
              print "<input type=submit name=do value=Help>";
            goto footer;
        }
        elsif ($in{do} eq 'More') {
            $MAX_OPTION += 2;
            goto MENU;
        }
        elsif ($in{do} eq 'Less') {
            $MAX_OPTION -= 2 if $MAX_OPTION > 2;
            goto MENU;
        }
        else {        # eq 'Back'
    MENU:
            &glynx_print_menu(%in);
              print "<input type=submit name=do value=Help><br>";
              print "<input type=submit name=do value=More> / ";
              print "<input type=submit name=do value=Less> options<br>";
              print "<input type=submit name=do value=Restart> Glynx<br>";
        }

        if (($in{do} eq "Download") and ($in{url} ne "") and ($in{url} !~ /^http\:\/*$/)) { 

            print "<h2>Processing</h2>";

            $in{depth} = $in{depth} + 0;
            $in{depth} = 5 if ($in{depth} > 5);

            $in{dump} =~ s/[\s\+]//;
            $in{dump} = "default" unless $in{dump};

            $in{prefix} = &make_prefix_from_url('', $in{url}) unless $in{prefix};

            print "URL: <b>$in{url}</b><br>";
            print "Depth: <b>$in{depth}</b><br>";
            print "Prefix: <b>$in{prefix}</b><br>";
            print "Job name: <b>$in{dump}</b><br>";
            print "Base-dir: <b>$main::BASE_DIR</b><br>";

            open (FILE, ">>${main::BASE_DIR}glynx.log");
            print FILE "ip:    $addr\n";
            print FILE "URL:   $in{url}\n";
            print FILE "Depth: $in{depth}\n";
            print FILE "Prefix: $in{prefix}\n";
            print FILE "Dump: $in{dump}\n";
            foreach (1 .. $MAX_OPTION) {
                print FILE "", $in{"option$_"}, ": ", $in{"val$_"}, "\n" if $in{"option$_"} and $in{"val$_"};
            }
            print FILE "\n";

            close (FILE);

            my $cmd = "$main::BASE_DIR$in{dump}.grx";
            open (FILE, ">$cmd");
            foreach (1 .. $MAX_OPTION) {
                print FILE "//", $in{"option$_"}, ": ", $in{"val$_"}, "\n" if $in{"option$_"} and $in{"val$_"};
            }
            print FILE "//DEPTH: $in{depth}\n";
            print FILE "//DUMP: $in{dump}\n";
            print FILE "//PREFIX: $in{prefix}\n";
            print FILE "//\n";
            print FILE "URL: $in{url}\n";
            print FILE "//Referer: .\n";
            print FILE "//Depth: $in{depth}\n";

            close (FILE);

          }
    footer:
        glynx_footer; 
    }

    1;


our @Config_Vars = qw/DIR_DEPTH_MAX NAME_LEN_MAX COOKIES AUTH DEPTH TIMEOUT AGENT REFERER INDEXFILE SLEEP OUT_DEPTH BASE_DIR PART_SUFFIX MAX_DOCS INVALID_CHAR LOOP SUBST DUMP DUMP_MAX RETRY_MAX MAKEREL/;
our @Config_Arrays = qw/PREFIX EXCLUDE/; 

sub valid_option {
    my ($opt) = @_;
    # print " valid_option ";
    foreach (@Config_Vars, @Config_Arrays) {
        # print " opt: $opt eq $_ <br> ";
        return 1 if ($opt eq $_);
    }
    return 0;
}

# Defaults
my $AUTH =        '';
my $MAKEREL =    0;
my $MIRROR =    0;
my $MEDIAEXT =    0;
my $DEPTH =    0;
my $TIMEOUT =    30;
my $AGENT =    "Mozilla/3.0 (WinNT; I)";
my $REFERER =    ".";
my $SLEEP =    1;
my $OUT_DEPTH =    0;    # opcao para maximo de niveis ao sair do site (0 = nao sai)
our $BASE_DIR =    ".";
my $PART_SUFFIX =    "._PART_";
my $MAX_DOCS =    10000;
my $INVALID_CHAR = '$';
my $COOKIES =    '';
my $NAME_LEN_MAX =  30;
my $DIR_DEPTH_MAX = 8;
my $SERVER_PORT = 8081;
my $CHILDREN = 0;
my $INDEXFILE =        "_INDEX_.HTM";


my @PREFIX =    ();
my @EXCLUDE =    ();        # "/\\/tn_|\\?[DSMN]=[AD]|banner|\\.gif/i";
my $LOOP =    "";        # "~~~~:1..50";
my $SUBST =    "";        # "s/show\\.asp\\?//";

my $DUMP =        "";
my $DUMP_MAX =    30;
my $RETRY_MAX =    5;
my $RETRY_404 =    1;
my $SLAVE =    0;
my $STOP =    0;
my $RESTART =    0;

# to-be configurable
my $RETRY_TIMEOUT_MULTIPLIER = 2;
my $MAKE_BACKUP = 0;
my $MAX_TIMEOUT = 3600;
my $DEFAULT_PART_SIZE = 4096 * 4;

# Defaults de uso interno, nao configuravel
my $MAX_TESTE_REPETICAO =    30;    # testa os ultimos links antes de incluir na lista
my $LIST_SIZE =    3;        # tamanho da estrutura de @links = ($url, $referer, $nivel)
my $CFG_SAVE =     0;

my $DUMP_SUFFIX =        ".grx";
my $NOT_FOUND_SUFFIX =    "._NOT_";
my $BUSY_SUFFIX =        "._BUSY_";
my $DONE_SUFFIX =        "._DONE_";
my $GLYNX_SUFFIX =        ".glynx";
my $BACKUP_SUFFIX =        ".bak";
my $ERROR_SUFFIX =        "._ERR_";

my $GET_SEPARATOR =    "\?";    # needs escaping
my $POST_SEPARATOR =     "_X_POST_X_";

my $CFG_FILE =        "glynx.ini";

# - at startup, read file-time of $SLAVE_RESTART_FILE.
# - do a restart whenever $SLAVE_RESTART_FILE file-time changes.
# - exit whenever $SLAVE_STOP_FILE exists.

my $SLAVE_STOP_FILE =    "_STOP_$GLYNX_SUFFIX";
my $SLAVE_RESTART_FILE =    "_RESTART_$GLYNX_SUFFIX";

# deixar fora desta lista: htm html js cgi txt cfm shtml
my @DEFAULT_EXCLUDE = qw/wav mov png swf css dcr doc rtf bak ra rm sfw pcx log ps bmp dvi pdf jar java class rar iso bin midi mid mod mpeg mpg mp3 avi jpg jpeg gif gz msi asf zip cdf exe tar/;
my $default_exclude = "/\\." . join("\$|\\.", @DEFAULT_EXCLUDE) . "\$/i";
# print "default_exclude: $default_exclude\n";

# data structures
my @dump;
my @links;
my @retry;
my @processed;

# general control

my $New_Program_Date;
my $New_Restart;
my $Last_Program_Date;
my $Last_Restart;

my $Dump_index;
my $dump_filenum;
my $dump_filename;
my $dump_nivel_zero;
my $dump_mode_error = 0;

my $num_docs;
my $retry;
our @loop;
my $http_daemon;
# my $ua;
my $nlinks;
my $just_slave;
my $Slave_file;
my $prefix;
my $new_random_filename;
my $dir_index;
my $dir_expr;
my $dir_busy;
my $dir;

my $VERBOSE;
my $QUIET;
my %Main_Config;

my $Boundary;
my $num_callback;
my $filename;
# my $real_name;

my $KILL_FINISH =                0;    # 1;
my $KILL_RESTART =            0;    # 2;
my $KILL_RESTART_PROGRAM_MODIFIED =    0;    # 3;
my $KILL_STOP =                0;    # 4;

my $IS_CHILD;
my $children;
my $SERVER;


my @myARGV = @ARGV;
$progname =~ s|.*/||;        # only basename left
$progname =~ s/\.\w*$//;    # strip extension if any

# $VERBOSE=1;
print "  [ READ-CFG: $CFG_FILE ]\n" if $VERBOSE;
&read_dump ($CFG_FILE);
print "  [ READ-CFG: get-options ]\n" if $VERBOSE;
&get_options;
print "  [ AUTH = $AUTH ]\n" if $VERBOSE;
print "  [ READ-CFG: preprocess-options ]\n" if $VERBOSE;
&preprocess_options;

&save_Config (\%Main_Config);
&show_Config (\%Main_Config) if $VERBOSE;


sub get_options {
    print "  [ GET_OPTIONS ]\n" if $VERBOSE;
    GetOptions(
    'depth=i'    => \$DEPTH,
    'timeout=i'    => \$TIMEOUT,
    'agent:s'    => \$AGENT,
    'referer:s'    => \$REFERER,
    'indexfile=s'    => \$INDEXFILE,
    'sleep=i'    => \$SLEEP,
    'out-depth=i'    => \$OUT_DEPTH,
    'base-dir=s'    => \$BASE_DIR,
    'part-suffix=s'    => \$PART_SUFFIX,
    'limit=i'    => \$MAX_DOCS,
    'invalid-char=s'    => \$INVALID_CHAR,
    'prefix=s'    => \@PREFIX,
    'exclude:s'    => \@EXCLUDE,
    'loop:s'    => \$LOOP,
    'subst:s'    => \$SUBST,
    'dump=s'    => \$DUMP,
    'auth=s'    => \$AUTH,
    'dump-max=i'    => \$DUMP_MAX,
    'retry=i'    => \$RETRY_MAX,
    '404-retry!'    => \$RETRY_404,        # --no404-retry
    'slave!'    => \$SLAVE,
    'verbose!'    => \$VERBOSE,
    'quiet!'    => \$QUIET,
    'restart!'    => \$RESTART,
    'stop!'        => \$STOP,
    'mirror!'    => \$MIRROR,
    'mediaext!'    => \$MEDIAEXT,
    'makerel!'    => \$MAKEREL,
    'cookies=s'    => \$COOKIES,
    'name-len-max=i'    => \$NAME_LEN_MAX,
    'dir-depth-max=i'    => \$DIR_DEPTH_MAX,
    'port=i'    => \$SERVER_PORT,
    'make-cpan!'    => \&make_CPAN,
    'server!'    => \$SERVER,
    'children=i'    => \$CHILDREN,

    # subroutines
    'version'    => \&print_version,
    'help'    => \&usage,
    'cfg-save!'    => \$CFG_SAVE, 

    # not implemented, but exist in lwp-rget:
    'hier'    => \&not_implemented('hier'),
    'iis'    => \&not_implemented('iis'),
    'tolower'    => \&not_implemented('tolower'),
    'nospace'    => \&not_implemented('nospace'),
    'keepext=s'    => \&not_implemented('keepext'),

    ) || usage();

}

sub preprocess_options {
    $BASE_DIR = "." if ! $BASE_DIR;
    $BASE_DIR =~ s/\\/\//g;
    #print "  [ BASE_DIR: $BASE_DIR ]\n" if $VERBOSE;
    unless (-d $BASE_DIR) {
        print "  [ BASE-DIR: creating $BASE_DIR ]\n" if $VERBOSE;
        &glynx_file::make_dir($BASE_DIR);
    }
    my $dir = abs_path("$BASE_DIR");
    # print "  [ BASE_DIR: $BASE_DIR + $dir ]\n" if $VERBOSE;
    $BASE_DIR = $dir;
    $BASE_DIR .= "/" if ! ($BASE_DIR =~ /\/$/);
    print "  [ BASE_DIR: $BASE_DIR ]\n" if $VERBOSE;

    @loop = split(":",$LOOP);
    print "  [ LOOP: $LOOP ]\n" if $VERBOSE;

    $TIMEOUT = $MAX_TIMEOUT if $TIMEOUT > $MAX_TIMEOUT;
    print "  [ TIMEOUT: $TIMEOUT ]\n" if $VERBOSE;
}

sub save_Config {
    my ($hashref) = @_;
    print "  [ SAVE-CONFIG ]\n" if $VERBOSE;
    no strict "refs";
    foreach(@Config_Vars) {
        $$hashref{$_} = ${$_};
    }
    foreach(@Config_Arrays) {
        $$hashref{$_} = [ @{$_} ];
    }
    use strict "refs";
}

sub retrieve_Config {
    my ($hashref) = @_;
    print "  [ RETRIEVE-CONFIG ]\n" if $VERBOSE;
    no strict "refs";
    foreach(@Config_Vars) {
        ${$_} = $$hashref{$_};
    }
    foreach(@Config_Arrays) {
        @{$_} = @{$$hashref{$_}};
    }
    use strict "refs";
}

sub show_Config {

    my ($hashref) = @_;
    print "  [ SHOW-CONFIG ]\n" if $VERBOSE;
    foreach(@Config_Vars) {
        print "  [ $_: ", $$hashref{$_} , " ]\n" if $VERBOSE;
    }
    foreach(@Config_Arrays) {
        print "  [ $_: ", join(',', @{$$hashref{$_}} ) , " ]\n" if $VERBOSE;
    }
}


if ($CFG_SAVE) {
    &cfg_save_default;
    exit 0;
}

my ($url, $referer, $nivel);
$url = shift;    # optional url or input file
$SERVER = 1 unless $url;

$IS_CHILD = 0;
if ($CHILDREN and ($SERVER or $SLAVE)) {
    # PARENTAL CONTROL
    my $pid;
    $children = $CHILDREN;
SPAWN:
    undef $pid;
    if (!defined($pid = fork)) {
        print "  [ cannot fork: $! ]\n" unless $QUIET;
        # exit 0;
    } 
    elsif ($pid) {
        print "  [ FORKING: $children ]\n" if $VERBOSE;
        $children--;
        sleep(20);    # go easy on CPU
        goto SPAWN unless $children <= 0;
        # print "begat $pid";
        # print "I'm the parent";
    }
    else {
        # print "I'm the child";
        # be a very quiet slave:
        $SLAVE = 1; $SERVER = 0; $QUIET = 1; $VERBOSE = 0;
        $IS_CHILD = 1;
    }
} # end: spawn children



print "  [ $progname.pl Version $VERSION ]\n" if $VERBOSE;
print "  [ URL = $url ]\n" if $VERBOSE;

unless ($url =~ /$DUMP_SUFFIX$/) {
    $url = uf_uristr($url);
}

print "  [ URL = $url ]\n" if $VERBOSE;
print "  [ LOOP = " , join(" ", @loop), " ]\n" if $VERBOSE;

    # print "  [ STOP ]\n"    if $STOP;
    $Last_Restart =    -M "$BASE_DIR/$SLAVE_RESTART_FILE";
    # print "  [ LAST-RESTART: $Last_Restart ]\n" if $VERBOSE;
    $Last_Program_Date = -M $0;
    # print "  [ LAST-PROGRAM-DATE: $0 = $Last_Program_Date ]\n" if $VERBOSE;

if ($SERVER and ! $SLAVE) {
    &http_server; 
    print "  [ Can't start http daemon at port $SERVER_PORT ]\n" unless $QUIET;
    print "  [ Starting slave mode ]\n" unless $QUIET;
    $SLAVE = 1; $SERVER = 0;
}
usage() if @ARGV;

&make_restart    if $RESTART;
&make_stop    if $STOP;
&my_main;

sub my_main {
    my $u1;

    # estrutura de @links = ($url, $referer, $nivel, ...)
    @links = ();        # coleta links para serem visitados ($url, $referer, $nivel, ...)
    $dump_nivel_zero = 1;    # if $DUMP, save last level. Reset if $SLAVE.
    $Slave_file = "";
    @dump = ();        # gera o arquivo dump (mesma estrutura de @links)

SLAVE_LOOP:

    @retry = ();        # arquivos incompletos, para tentar novamente
    @processed = ();    # links ja visitados (lista simples)

    $num_docs = 0;
    $dump_filenum = 0;
    $retry = $RETRY_MAX;
    # $prefix = $PREFIX[0];
    $Dump_index = 0;

    if ( ($url =~ /$DUMP_SUFFIX$/) and !($url =~ /:/) ) {
        # DUMP:
        # verifica se o nome corresponde a um arquivo dump
        if (-e "$url")        { $dump_filename = "$url"; }
        elsif (-e "$BASE_DIR$url")    { $dump_filename = "$BASE_DIR$url"; }
        elsif (-e "$url$DUMP_SUFFIX")    { $dump_filename = "$url$DUMP_SUFFIX"; }
        elsif (-e "$BASE_DIR$url$DUMP_SUFFIX") { $dump_filename = "$BASE_DIR$url$DUMP_SUFFIX"; }
        else  { die "  [ CAN'T FIND INPUT FILE: $url ]" }
        read_dump($dump_filename);
        # read_dump($DUMP) if $DUMP;    # evita perder informacao ???
    }
    elsif ($url) {
        # URL:
        # pega o nome do site
        $REFERER = $url unless $REFERER;
        print "  [ URL: abs: $url ]\n" if $VERBOSE;
        $u1 = URI::URL->new_abs($url, $REFERER);
        #$myhost = $u1->host; 
        #print "Host: $myhost\n";

        unless ($#PREFIX >= 0) {
            print "  [ PREFIX: abs: $PREFIX[0] ]\n" if $VERBOSE;
            $prefix = &make_prefix_from_url($PREFIX[0], $u1);
            @PREFIX = ($prefix);
            print "  [ PREFIX: @PREFIX ]\n" unless $QUIET;
        }
        &insert_url ($url, $REFERER, $DEPTH);
    }
    else {
        # print "  [ NO URL ]\n" unless $QUIET;
    }

download_links_retry:

    while (@links) {
        if ($num_docs >= $MAX_DOCS) {
            print "  [ FIM: num_docs > $MAX_DOCS ]\n" if $VERBOSE;
            last;
        }
        #print "LINKS $#links -- $url --";
        ($url, $referer, $nivel) = shift_list(\@links);
        $nlinks = ($#links + 1) / $LIST_SIZE;
        last if $nlinks > $MAX_DOCS;
        # print " ($url, $referer, $nivel [$nlinks] \n";
        download($url, $referer, $nivel);

    print "  [ STATUS: READ:", 
            $#processed + 1, "/",
            +(($#links + 1) / $LIST_SIZE) + $#processed + 1, 
        " LATER:", 
            +($Dump_index) / $LIST_SIZE, "/", 
            + ($#dump + 1) / $LIST_SIZE, 
        " DEPTH:", 
            $DEPTH - $nivel, "/", 
            $DEPTH, " ]\n" unless $QUIET;

        # time to make a partial dump?
        if (    $DUMP and 
            $DUMP_MAX and
            (($#dump - $Dump_index) > ($DUMP_MAX * $LIST_SIZE) ) ) {
            &dump;
        }
       }

    # RETRY?

    if (($#retry >= 0) and ($retry > 1)) {
        print "  [ RETRY: LEVEL:", $RETRY_MAX - $retry + 2, "/$RETRY_MAX URL:", +($#retry + 1) / $LIST_SIZE, " ]\n" unless $QUIET;
        $retry--;
        @links =    @retry;
        @processed =    ();
        @retry =    ();
        # @dump =    ();
        # $Dump_index =    0;
        $RETRY_TIMEOUT_MULTIPLIER = 1  if $RETRY_TIMEOUT_MULTIPLIER < 1;
        $RETRY_TIMEOUT_MULTIPLIER = 10 if $RETRY_TIMEOUT_MULTIPLIER > 10;
        $TIMEOUT *= $RETRY_TIMEOUT_MULTIPLIER;
        $TIMEOUT = $MAX_TIMEOUT if $TIMEOUT > $MAX_TIMEOUT;
        glynx::timeout($TIMEOUT);
        print "  [ RETRY: TIMEOUT:", int($TIMEOUT), " ]\n" if $VERBOSE;
        goto download_links_retry;
    }
    else {
    if ($#retry < 0) {
        print "  [ DONE: DON'T NEED TO RETRY ]\n" if $VERBOSE;
    }
    elsif ($retry > 1) {
        print "  [ FAILED: URL:$retry ]\n" if $VERBOSE;
    } 
    else {
        print "  [ FAILED: WILL NOT RETRY ]\n" if $VERBOSE;
        $dump_mode_error = 1;
    }
    }
    # append pending retrys to dump
    print "  [ DUMP: Move ", +($#retry + 1) / $LIST_SIZE, " from Retry to Dump ]\n" if $VERBOSE;
    @dump = (@dump, @retry);

    print "  [ DUMP: [$DUMP] ", +($#dump + 1) / $LIST_SIZE, " ]\n" if $VERBOSE;
    while (($Dump_index <= $#dump) and $DUMP) {
        &dump;
    }

    $dump_mode_error = 0;


    # check for Slave mode

    $just_slave = 1;

SLAVE_IDLE:
    while ($SLAVE) {

    if ($Slave_file ne "") {
        # done
        &glynx_file::my_rename ("$Slave_file$BUSY_SUFFIX", "$Slave_file$DONE_SUFFIX") if -e "$Slave_file$BUSY_SUFFIX";
    }
    else {
        #print "  [ SLAVE: unknown slave file $Slave_file ]\n";
    }

    # timer
    # get_options;
    # read_dump ($CFG_FILE);
    &retrieve_Config (\%Main_Config);
    # &show_Config (\%Main_Config) if $VERBOSE;

    if (! $SLAVE) {
        print "  [ SLAVE: CANCELLED ]\n" unless $QUIET;
        last SLAVE_IDLE;
    }

    if ($just_slave) {
        print "  [ SLAVE: IDLE FOR $SLEEP SEC ]\n" unless $QUIET;
        &my_sleep ($SLEEP);
        $just_slave = 0;
    }
    else {
        print "  [ SLAVE: IDLE FOR $TIMEOUT SEC ]\n" unless $QUIET;
        &my_sleep ($TIMEOUT);
    }

    # what's in dir?
    $dir_expr = "$BASE_DIR";
    opendir DIR, $dir_expr or die "  [ SLAVE: CAN'T OPEN $dir_expr ]\n";
        my @dir =  readdir(DIR);
        print "  [ SLAVE: DIR: $BASE_DIR -- ", join(',',@dir), " ]\n" if $VERBOSE;
        my @busy_files = grep { (/$BUSY_SUFFIX$/) and (-f "$BASE_DIR$_") } @dir;
        @dir = grep { (/$DUMP_SUFFIX$/) and (-f "$BASE_DIR$_") } @dir;
    closedir DIR;
    print "  [ SLAVE: $dir_expr: $DUMP_SUFFIX -- ", join(',',@dir), " ]\n" if $VERBOSE;
    $dir_index = 0;

    # test one of busy_files for expiration -- make it available for slaves
    my $random_file = int(rand(1 + $#busy_files));
    my $random_filename = $BASE_DIR.$busy_files[$random_file];
    # print "  [ TEST BUSY: $BUSY_SUFFIX -- $random_filename $random_file ", join(",",@busy_files)," ]\n" if $VERBOSE;
    if ((-M $random_filename) > 1/2) {
        $new_random_filename = $random_filename;
        $new_random_filename =~ s/$BUSY_SUFFIX$//;
        &glynx_file::my_rename($random_filename, $new_random_filename);
        &glynx_file::my_touch($new_random_filename);
    }

SLAVE_TEST_DIR:

    while ($#dir >= $dir_index) {
        # rename file
        $dir = "$BASE_DIR$dir[$dir_index]";
        $dir_busy = "$dir$BUSY_SUFFIX";
        if (-e $dir_busy) {
            print "  [ SLAVE: $dir busy ]\n" if $VERBOSE;
            if (-e $dir) {
                # both exist -- delete one
                &glynx_file::my_unlink ($dir_busy);
            }
            if (-e $dir_busy) {
                $dir_index++;
                next SLAVE_TEST_DIR;
            }
        }
        &glynx_file::my_rename ($dir, $dir_busy);
        # check again
        unless (-e ($dir_busy)) {
                print "  [ SLAVE: can't rename $dir ]\n" unless $QUIET;
                next SLAVE_TEST_DIR;
        }
        unless (-s ($dir_busy)) {
                print "  [ SLAVE: $dir empty ]\n" unless $QUIET;
                next SLAVE_TEST_DIR;
        }
        # read dump file
        $dump_nivel_zero = 0;
        @dump = ();        # gera o arquivo dump (mesma estrutura de @links)
        read_dump($dir_busy);
        $Slave_file = $dir;
        print "  [ SLAVE: processing $Slave_file ]\n" unless $QUIET;
        last SLAVE_IDLE
    } # dir ok
    } # slave

    if ($SLAVE) {
        print "  [ SLAVE: continue processing $Slave_file ]\n" if $VERBOSE;
        $url = "";
        $dump_nivel_zero = 0;    # download level zero, even if $DUMP
        goto SLAVE_LOOP;
    }

    print "  [ END ]\n" unless $QUIET;
} # my_main




sub make_prefix_from_url {
    my ($seed_prefix, $u1) = @_;
    my ($eval_err);
    $prefix = URI::URL->new_abs($seed_prefix, $u1);
    print "  [ PREFIX: Generated: $prefix ]\n" if $VERBOSE;
    # clear fragment, query...
    # test for invalid protocol
    eval{$prefix->userinfo('')};
    if ($eval_err = $@) {
        print "  [ PREFIX: Error: $eval_err ]\n";
        print "  [ PREFIX: Error: Possible cause: invalid protocol ]\n" if $VERBOSE;
        return $u1;
    }

    my ($prefix, $separator, $query) = split_query_from_url($prefix);
    print "  [ PREFIX: new: $prefix ]\n" if $VERBOSE;
    $prefix = URI::URL->new($prefix);

    $prefix->fragment('');

    # removes file name
    unless ($prefix =~ /\/$/) {
            ($prefix) = $prefix =~ /^(.*\/)/;
            # print "  [ PREFIX: new: $prefix ]\n" if $VERBOSE;
    }
    # removes authentication
    if ($prefix =~ /\@/) {
            ($prefix) = $prefix =~ /.*\@(.*)/;
            print "  [ PREFIX: new: $prefix ]\n" if $VERBOSE;
    }
    return $prefix;
}




sub my_sleep {
    my ($time) = @_;
    print "  [ SLEEP $SLEEP " unless $QUIET;
    foreach ( 1 .. $time ) {
        &check_stop;
        sleep 1;
        print "." unless $QUIET;
    }
    &check_stop;
    print " done ]\n" unless $QUIET;
}

sub make_stop {
    # - do a restart whenever $SLAVE_RESTART_FILE file-time changes.
    print "  [ MAKE-STOP ]\n" if $VERBOSE;
    &glynx_file::my_unlink("$BASE_DIR/$SLAVE_STOP_FILE");
    &glynx_file::my_unlink("$BASE_DIR/$SLAVE_RESTART_FILE");
    &glynx_file::my_create_empty("$BASE_DIR/$SLAVE_STOP_FILE");
}

sub make_restart {
    # - exit whenever $SLAVE_STOP_FILE exists.
    print "  [ MAKE-RESTART ]\n" if $VERBOSE;
    &glynx_file::my_unlink("$BASE_DIR/$SLAVE_STOP_FILE");
    &glynx_file::my_unlink("$BASE_DIR/$SLAVE_RESTART_FILE");
    &glynx_file::my_create_empty("$BASE_DIR/$SLAVE_RESTART_FILE");
}

sub my_stop {
    $http_daemon = undef;
    exit 0 if ($IS_CHILD);
    sleep (30);    # wait a bit for children
    exit 0;
}

sub my_exec {
    $http_daemon = undef;
    exit 0 if ($IS_CHILD);    # children first
    sleep (10);    # wait a bit for children
    exec $_[0];
    die "done";
}

sub check_stop {
    my ($do_str);
    #  --stop            Stop slave
    #  --restart         Stop and restart slave
    # - at startup, read file-time of $SLAVE_RESTART_FILE.
    # - do a restart whenever $SLAVE_RESTART_FILE file-time changes.
    # - exit whenever $SLAVE_STOP_FILE exists.
    # print "  [ SLAVE: $SLAVE -- $BASE_DIR/$SLAVE_STOP_FILE ]\n" if $VERBOSE;

    # not a command-line business
    return if ! ($SLAVE or $SERVER);

    # print "  [ SLAVE: CHECK STOP ]\n" if $VERBOSE;
    if (-e "$BASE_DIR/$SLAVE_STOP_FILE") {
        print "  [ SLAVE: STOP ]\n" if $VERBOSE;
        # exit $KILL_STOP;
        my_stop;
    }
    if (-e "$BASE_DIR/$SLAVE_RESTART_FILE") {
        $New_Restart =    -M "$BASE_DIR/$SLAVE_RESTART_FILE";
        # print "  [ LAST-RESTART: $Last_Restart -- $New_Restart ]\n" if $VERBOSE;
        if ($Last_Restart != $New_Restart) {
            print "  [ SLAVE: RESTART ]\n" if $VERBOSE;
            # exit $KILL_RESTART;
            $do_str = "$0 " . join(' ', @myARGV);
            print "  [ STARTING $do_str ]\n" if $VERBOSE;
            print "  [ RESTARTING ]\n" unless $QUIET;
            &my_exec ($do_str);
            die "done";
        }
    }
    if (-e $0) {
        # program modified?
        $New_Program_Date =    -M $0;
        # print "  [ LAST-PROGRAM-DATE: $Last_Program_Date -- $New_Program_Date ]\n" if $VERBOSE;
        if ($Last_Program_Date != $New_Program_Date) {
            print "  [ SLAVE: RESTART ]\n" if $VERBOSE;
            # exit $KILL_RESTART_PROGRAM_MODIFIED;
            $do_str = "$0 " . join(' ', @myARGV);
            print "  [ STARTING $do_str ]\n" if $VERBOSE;
            print "  [ RESTARTING ]\n" unless $QUIET;
            &my_exec ($do_str);
            die "done";
        }
    }
}

# Download List File Format:
#   [//] space [comment]
#   [//]tag: space value
#   [//]any_var_name: space value
# Tags:
#   URL: xxx    - URL
#   Referer:    - referrer URL
#   Depth:    - link levels to download from the URL
# Reserved, unimplemented tags:
#   File: xxx    -- Absolute path\filename for file (DOS style slashes)
#   Desc: xxx    -- Description
#   User: xxx    -- Username
#   Pass: xxx    -- Password (encrypted)
#   Alt: xxx    -- Alternate URL (multiple)
#
#    names are Case-Sensitive.
#    "//" is for compatibility with other download managers and may be ommitted.
#    "//" is read as [!\w\s]*
#    values may have single-quotes as delimiters.
#    values may contain single-quotes and spaces.
#    single-quotes don't need to be escaped.

sub read_dump {
    my ($dump_filename) = @_;
    my ($var_name, $processed_options);
    $processed_options = 0;
    # my (@tmp_prefix);
    # @tmp_prefix = @PREFIX;
    # ??? @PREFIX = ();        # will use file's prefixes

    # print "  [ DUMP: opening $dump_filename ]\n" if $VERBOSE;
    if (! -e $dump_filename) { 
        $dump_filename = "$BASE_DIR$dump_filename"; 
        if (! -e $dump_filename) { return }
    }
    print "  [ DUMP: opening $dump_filename ]\n" if $VERBOSE;
    open(FILE, $dump_filename) or die "  [ DUMP: Can't open $dump_filename ]";

        #//OUT_DEPTH: 0
        #//PREFIX: http://us.a1.yimg.com/us.yimg.com/   --> ALLOW MULTIPLE
        #URL: http://us.a1.yimg.com/us.yimg.com/i/ww/m5v2.gif
        #File: D:\download_getright\us.a1.yimg.com\us.yimg.com\i\ww\m5v2.gif
        #//Referer: http://www.yahoo.com/
        #//Depth: 2

        # $dump_nivel_zero = 0;    # desabilita, pois todos os arquivos sao nivel zero.
        # $OUT_DEPTH =    1 if ($OUT_DEPTH < 1) and (! $PREFIX);    # nao sei quem e o host...

        # reset parameters
        $url =        "";
        #File:        -- not used ???
        my $Referer =    $REFERER;
        my $Depth =    $DEPTH;

        foreach(<FILE>) {
            chomp;
            my ($cmd, $opt) = split(" ", $_, 2);
            if ($cmd =~ /URL:/i) {
                # $prefix =    $PREFIX[0];
                unless ($processed_options) {
                    &preprocess_options;    # must do this once before processing url
                    $processed_options = 1;
                }
                &insert_url ($url, $Referer, $Depth) if $url;
                # reset parameters
                $url =        $opt;
                #File:        -- not used ???
                # ?    $referer =    $Referer;
                # ? $depth =    $Depth;
                $Referer =    $REFERER;
                $Depth =    $DEPTH;
            } 
            elsif ($cmd =~ /(\w*):/) {
                $var_name = $1;

                $opt = $1 if $opt =~ /^'(.*)'\s*$/;    # remove delimiters
                $opt =~ s/'/\\'/;    # escape other delimiters
                $opt = "'" . $opt . "'";    # put delimiters back

                if (grep { /^$var_name$/ } @Config_Arrays) {
                    eval "push @" . $var_name . ", $opt";
                    print "  [ CFG: \$$var_name = ", eval "\@" . $var_name . "[-1]", " ]\n" if $VERBOSE;
                }
                else { # if ($var_name ne "DUMP") {
                    eval "\$$var_name = $opt";
                    print "  [ CFG: \$$var_name = $opt ]\n" if $VERBOSE;
                }
            }
        }
    close(FILE);
    # last one ...
    &insert_url ($url, $Referer, $Depth) if $url;
    &preprocess_options;

}

sub dump {
    return unless ($DUMP);
    my ($filename, $name, $dir, $dump_filename, $dump_links);
    my ($url, $referer, $nivel);

    $dump_links = 0;
    # cria um diretorio absoluto
    $dir = abs_path("$BASE_DIR");
    #print "$dir\n";

    do {
        $dump_filenum++;
        $dump_filename = "$dir/$DUMP";
        $dump_filename .= $DUMP_SUFFIX if ! ($dump_filename =~ /$DUMP_SUFFIX$/);
        $dump_filename =~ s/(.*)\.(.*)/$1-$dump_filenum\.$2/ if $DUMP_MAX;
    } until ((! -e "$dump_filename") and 
            (! -e "$dump_filename$BUSY_SUFFIX") and 
            (! -e "$dump_filename$DONE_SUFFIX"));
    $dump_filename .= $ERROR_SUFFIX if $dump_mode_error;
    print "  [ DUMP: $dump_filename ]\n" unless $QUIET;

    if ($#dump < 0) {
        print "  [ DUMP: EMPTY ]\n" unless $QUIET;
        &glynx_file::my_unlink ($dump_filename);
        return;
    }

    cfg_save($dump_filename);
    open (FILE, ">>$dump_filename");

    while ($Dump_index <= $#dump) {
            $url =     $dump[$Dump_index++];
            $referer = $dump[$Dump_index++];
            $nivel =   $dump[$Dump_index++];
            print "  [ WRITE: $url ]\n" if $VERBOSE;
            $name = &make_filename($url);
            $filename = "$dir/$name";
            if (-e $filename) {
                if (-d $filename) {
                    print "  [ ja existe diretorio: $filename ]\n" if $VERBOSE;
                    $filename .= '/' . $INDEXFILE;
                    print "  [ trying: $filename ]\n" if $VERBOSE;
                    next if (-s $filename);
                } elsif (-s $filename) {
                    print "  [ ja existe: $filename ]\n" if $VERBOSE;
                    next;
                }
            }
            $filename =~ s/\//\\/g;
            print FILE <<EOT; 
URL: $url
File: $filename
//Referer: $referer
//Depth: $nivel
EOT
            $dump_links++;
            last if $DUMP_MAX and ($dump_links >= $DUMP_MAX);
    }
    close (FILE);

    print "  [ DUMP: finish ]\n" if $VERBOSE;
} # end: dump

sub cfg_save_default {
    cfg_save($CFG_FILE);
}

sub cfg_save {
    my ($filename) = @_;
    # my ($tmp_prefix);
    my ($var_name);
    my $file = $filename;
    if (-e $filename) { }
    elsif (-e "$BASE_DIR$filename") { $file = "$BASE_DIR$filename"; }
    open(FILE, ">$file") or
        open(FILE, ">$filename") or
            open(FILE, ">$BASE_DIR$filename") or 
                die "  [ Can't write config to $file ]\n"; 

        # Write out actual prefix in use, instead of the (maybe null) config prefix. 
        # Otherwise it may happen that the links will be rejected as "out" when read.
        # @tmp_prefix = @PREFIX;
        # $PREFIX[0] = $prefix;

        print FILE <<EOT; 
// Generated by $progname.pl Version $VERSION - Copyright 2000, Flavio Glock.
//
EOT

        print "  [ AUTH = $AUTH ]\n" if $VERBOSE;

        foreach $var_name (@Config_Vars) {
            print FILE "//$var_name: \'", eval "\$$var_name", "\'\n";
        }
        foreach $var_name (@Config_Arrays) {
            foreach (0 .. eval "\$#$var_name") {
                #print "  [ eval: \$#$var_name -- \$", $var_name, "[$_] ]\n";
                print FILE "//$var_name: \'", eval ("\$" . $var_name . "[$_]"), "\'\n";
            }
        }
        print FILE "//\n";
    close (FILE);
    print "  [ CFG-SAVE: DONE $file ]\n" unless $QUIET;

    # restore vars
    #@PREFIX = @tmp_prefix;
}

sub split_query_from_url {
    my ($url) = @_;

    # print "  [ NAME: SPLIT (url) $url ]\n" if $VERBOSE;

    if ($url =~ /(.*)(${POST_SEPARATOR})(.*)/) {
        print "  [ NAME: SPLIT: ($1, $2, $3) ]\n" if $VERBOSE;
        return ($1, $2, $3);    # ($url, $separator, $query)
    }
    # elsif ($url =~ /(.*)(${GET_SEPARATOR})(.*)/) {
    elsif ($url =~ /(.*)(\?)(.*)/) {
        print "  [ NAME: SPLIT: ($1, $2, $3) ]\n" if $VERBOSE;
        return ($1, $2, $3);    # ($url, $separator, $query)
    }
    print "  [ NAME: SPLIT: NOT ]\n" if $VERBOSE;
    return ($url);
}

sub make_filename {
    my ($url) = @_;
    my ($u1, $host, $port, $path, $separator, $query); 

    ($u1, $separator, $query) = split_query_from_url($url);

    $u1 =        URI::URL->new($u1);
    $host =        $u1->host;
    $port =        $u1->port;
    $path =        $u1->path;

    # print "  [ NAME: (host) $host (path) $path $separator (query) $query ]\n" if $VERBOSE;

    return &make_filename_from_parts($host, $port, $path, $separator, $query);
}

sub check_translation_url {
    my ($short_filename, $short_parent) = @_;
    my ($trans_filename, @a, $tr_str, $new_name);
    my (@parent_parts, $local_parent, $my_parent, $local_url);

    # split parent and test path translations... ???
    ($local_parent) = $short_parent =~ /$BASE_DIR(.*)/;
    @parent_parts = split('/', $local_parent);
    push @parent_parts, $short_filename;
    # print "  [ check_translation_url: ", join(" ", @parent_parts), " ]\n" if $VERBOSE;

    @a = split(quotemeta($INVALID_CHAR), $parent_parts[0]);
    # print "  [ check_translation_url: $INVALID_CHAR = ", join(" $INVALID_CHAR ", @a), " ]\n" if $VERBOSE;
    if ($#a == 1) {
        $parent_parts[0] = join(':', @a);
    }

    UP: foreach (1 .. $#parent_parts) {
        if (($_ == 1) and ($parent_parts[$_] eq '..')) {
            print "  [ SPLICE-FROM: ", join(" ", @parent_parts), " ]\n" if $VERBOSE;
            splice (@parent_parts, $_, 1);
            print "  [ SPLICE-TO:   ", join(" ", @parent_parts), " ]\n" if $VERBOSE;
            redo UP;
        }
        elsif ($parent_parts[$_] eq '..') {
            print "  [ SPLICE-FROM: ", join(" ", @parent_parts), " ]\n" if $VERBOSE;
            splice (@parent_parts, $_ - 1, 2);
            print "  [ SPLICE-TO:   ", join(" ", @parent_parts), " ]\n" if $VERBOSE;
            redo UP;
        }
    }

    # check that each parent exists, or that it has an alias.
    $local_parent = $BASE_DIR;
    $local_url = "";
    foreach (0 .. $#parent_parts) {
        my $interim_name = &glynx_file::check_translation_url_node($parent_parts[$_], $local_parent);
        unless ($interim_name) {
            $my_parent = $local_parent . $parent_parts[$_];
            if (-e $my_parent) {
                # print "  [ short: exists: $my_parent ]\n";
                $interim_name = $parent_parts[$_];
            }
            else {
                # print "  [ short: not found: $my_parent -- using it anyway ]\n";
                # To-do -- ??? (maybe it is ok)
                $interim_name = $parent_parts[$_];
            }
        }
        # print "  [ short: found $interim_name ]\n";
        # $my_parent = $local_parent . $interim_name . '/';
        $local_parent .= $parent_parts[$_] . '/';
        $local_url    .= $interim_name . '/';
    }
    # now put the filename on it
    # ...
    $local_url =~    s/${INDEXFILE}\/?$//;
    $local_parent =~ s/${INDEXFILE}\/?$//;
    unless (-d "$BASE_DIR$local_parent") {
        $local_url =~    s/\/$//;
        $local_parent =~ s/\/$//;
        # print "  [ short: NOT-DIR: $local_parent => $local_url ]\n";
    }
    # print "  [ short: END $local_parent => $local_url ]\n" if $VERBOSE;
    return $local_url;
}


sub make_shorter_name {
    my ($filename, $parent, $urlname) = @_;
    my ($new_name, $trans_filename, @a, $name, $extension, $maxname, $base_name);
    my ($random_1, $random_2, $rnd);
    my ($digits);

    # do we have a name in $NAME_TRANSLATION_FILE ?
    if ($new_name = &glynx_file::check_translation_file($urlname, $parent)) {
        $_[0] = $new_name;
        return;
    }

    ($name, $extension) = split('\.',$filename,2);
    if (length($extension) > 10) {
        # invalid extension? -- arbitrary limit
        # print "  [ SHORTER-NAME: invalid extension: $extension ]\n" if $VERBOSE;
        ($name, $extension) = ($filename,'');
    }
    $extension =~ tr/\//${INVALID_CHAR}/;    # in case this is a joined subdirectory name

    $maxname = $NAME_LEN_MAX - length($extension) - 1;
    $maxname = 8 if $maxname < 8;    # -- arbitrary limit, again

    if (length($name) <= $maxname) {
        # can't do any better?
        $new_name = $name;
        $new_name =~ tr/\//${INVALID_CHAR}/;    # in case this is a joined subdirectory name
        $new_name .= '.' . $extension if $extension;
    }
    else {
        print "  [ SHORTER-NAME: $name + $extension ]\n" if $VERBOSE;
        # 4 digits should be enough
        $digits = 4;                    # 1000 .. 9999
        $random_1 = '1' . ('0' x ($digits - 1));    # 1 => 1000
        $random_2 = $random_1 . '0';            # 2 => 10000
        # print " formula: int(rand($random_2 - $random_1)) + $random_1 \n";
        $maxname = $maxname - $digits + 1;
        $base_name = substr($name, 0, $maxname);
        $base_name =~ tr/\//${INVALID_CHAR}/;    # in case this is a joined subdirectory name
        # note: this way of verifying unique MAY be a problem in a multi-process environment
        do {
            $rnd = int(rand($random_2 - $random_1)) + $random_1;
            $new_name = $base_name . $rnd;
            $new_name .= '.' . $extension if $extension;
            # check for duplicate names
            # print "  [ SHORTER-NAME: VERIFYING UNIQUE $new_name ]\n" if $VERBOSE;
        } while grep { /=$new_name>/ } @a;
    }
    # log the name-change
    &glynx_file::log_translation_file($urlname, $new_name, $parent) if $urlname ne $new_name;
    $_[0] = $new_name;
}

sub make_filename_from_parts {
    my ($host, $port, $path, $separator, $query) = @_;
    my ($name, $urlname, $depth1, @file_names, $parent);

    # keep original names somewhere
    my ($url_name, @url_names);
    my ($url_host, $url_port, $url_path, $url_query) =
    ($host, $port, $path, $query);

    $name = $host;
    $name .= $INVALID_CHAR . $port if ($port != 80) and ($name);

    $url_name = $url_host;
    $url_name .= ":" . $url_port if ($port != 80) and ($url_name);

    $path =~ tr/\\/\//;            # \ => /
    $path =~ s/\/$/\/${INDEXFILE}/g;    # final slash => "/$INDEXFILE"
    $path =~ s/\/\//\//g;            # // => /
    $path =~ s/\/[^\/]*?\/\.\.\//\//g;    # /aaa/xxx/../ => /aaa/
    $query =~ s/[\\\/\:\*\?\"\<\>\|]/${INVALID_CHAR}/g;    # invalid chars

    $name .= $path;
    $name =~ s/[\:\*\?\"\<\>\|]/${INVALID_CHAR}/g;

    $url_path =~ s/\/[^\/]*?\/\.\.\//\//g;    # /aaa/xxx/../ => /aaa/
    $url_name .= $url_path;

    $name .= $INVALID_CHAR  if $separator;
    $url_name .= $separator if $separator;

    $name .= $query if $query;
    $name =~ s/\.$/\$/;            # final dot => invalid char

    $url_name .= $url_query if $query;

    # Win-NT charset:
    #    allowed:    = & _ - space
    #    not allowed:    \ / : * ? " < > |
    # Win-NT names with dots:
    #    allowed:    .* ..* ...*
    #            *.* *..* *...*
    #    not allowed:    . .. *.

    # print "  [ NAME: $name => (host) $host (path) $path (query) $query ]\n" if $VERBOSE;
    # print "  [ NAME: $url_name => $name ]\n" if $VERBOSE;

    @file_names = split("\/", $name);
    @url_names =  split("\/", $url_name, $#file_names + 1);
    # print "  [ NAME: name_depth: $#file_names file_name: $file_names[-1] ]\n" if $VERBOSE;

    # up to 2 times dir depth reduction, by joining pairs of dir-names.

    if ($#file_names > $DIR_DEPTH_MAX) {
        $depth1 = $#file_names - 1;
        foreach (3 .. $depth1) {
            # print " process: $_ -- $#file_names -- $DIR_DEPTH_MAX \n";
            if (($#file_names > $DIR_DEPTH_MAX) and ($_ <= $#file_names)) {
                splice(@file_names, -$_, 2, 
                    $file_names[-$_] . "/" . $file_names[1-$_]);
                splice(@url_names, -$_, 2, 
                    $url_names[-$_] . "/" . $url_names[1-$_]);
            }
        }
    }

    # again...

    if ($#file_names > $DIR_DEPTH_MAX) {
        $depth1 = $#file_names - 1;
        foreach (3 .. $depth1) {
            # print " process: $_ -- $#file_names -- $DIR_DEPTH_MAX \n";
            if (($#file_names > $DIR_DEPTH_MAX) and ($_ <= $#file_names)) {
                splice(@file_names, -$_, 2, 
                    $file_names[-$_] . "/" . $file_names[1-$_]);
                splice(@url_names, -$_, 2, 
                    $url_names[-$_] . "/" . $url_names[1-$_]);
            }
        }
    }

    # check file/dir name length

    $parent = $BASE_DIR;
    foreach (0 .. $#file_names) {
        if ((length($file_names[$_]) > $NAME_LEN_MAX) or ($file_names[$_] =~ /\//)) {
            # print "  [ NAME: CHANGE: $url_names[$_] to $file_names[$_] at $parent ]\n";
            &make_shorter_name($file_names[$_], $parent, $url_names[$_]);
            # print "  [ NAME: NOW IS: $file_names[$_] ]\n";
        }
        $parent .= "/" unless $parent =~ /\/$/;
        $parent .= $file_names[$_];
    }

    $name = join("\/", @file_names);
    $urlname = join("\/", @file_names);
    print "  [ NAME: name_depth: $#file_names file_name: $file_names[-1] name: $name ]\n" if $VERBOSE;

    return $name;
}


sub download {
    my ($url, $referer, $nivel) = @_;
    my ($u1, $msg, $new_file_location, $content_location, 
        $rel_filename, $count, $Full_Text, $lm, $new_filename, 
        $file_depth, $base_filename, $url_filename, $original,
        $uri_filename, $prev, $cache_filename, $method, $new_file_uri,
        $form_method, $form_request, @form_query, $downloaded, $parent,
        $form_action, $item, $value, @tags, @scripts, @a, $mime_text_html,
        @form_names, @forms, $url_path, $Content_Type, $suffix, @suffix,
        $content_base, $path, $path1, $path2, $new_url, $separator, $url_no_query,
        $location, $location, $content_range, $content_size, $content_difference,
        $file_size, $data, $INET, $query, $res, $base,
        $content_end, $content_begin, $filesize, $mtime, $url_base,
        $content_length, $url_base, $meio1, $meio2, $name, $content,
    );

    $mime_text_html = 0;    # should process links?
    $downloaded = 0;    # new file or cache?
    $Content_Type = '';
    $u1 = $url;
    # cuida para ficar neste host
    # $OUT_DEPTH == 0  - nao faz download externo
    # $OUT_DEPTH == 1  - faz download mas nao segue (nivel zero)
    unless (grep { $url =~ /$_/ } @PREFIX) {
        print "  [ OUT ", join(",",@PREFIX), " DEPTH:$nivel OUT-DEPTH:$OUT_DEPTH ]\n" if $VERBOSE;
        return if $OUT_DEPTH < 1;
        $nivel = $OUT_DEPTH - 1 if $nivel >= $OUT_DEPTH;
        return if $nivel < 0;
        print "  [ OUT: DEPTH => $nivel ]\n" if $VERBOSE;
    }
    # controle do que ja foi visitado
    $meio1 = $#processed / 3;
    $meio2 = $meio1 + $meio1;
    foreach (0 .. $meio1, $meio2 .. $#processed, +($meio1 + 1) .. +($meio2 - 1)) {
        if ($processed[$_] eq $url) {
            print "  [ DID ]\n" if $VERBOSE;
            return;
        }
    }
    push @processed, $url;
    $name = &make_filename($url);
    # print "  [ REAL_NAME: $real_name -- URL: $url ]\n" if $VERBOSE;
    $filename = "$BASE_DIR$name";    # $filename is global

    if (-e "$filename$NOT_FOUND_SUFFIX") {
        print "  [ NOT-FOUND: ja existe $filename$NOT_FOUND_SUFFIX ]\n" if $VERBOSE;
        return;
    }

    $mtime = 0;
    if (-e $filename) {
        if (-d $filename) {
            print "  [ DIR EXISTS: $filename ]\n" if $VERBOSE;
            $filename .= '/' . $INDEXFILE;
            print "  [ CREATE FILE: $filename ]\n" if $VERBOSE;
            unless ($MIRROR) { 
                if (-s $filename) {
                    # URL should have ending "/"
                    ($path, $separator, $query) = split_query_from_url($url);
                    # ($path, $query) = split('\?', $url, 2);
                    $url = $path . '/' . $query if ! ($path =~ /\/$/);
                    goto DOWNLOAD_OK;
                }
            }
        } elsif (-s $filename) {
            print "  [ FILE EXISTS: $filename ]\n" if $VERBOSE;
            unless ($MIRROR) { 
                goto DOWNLOAD_OK;
            }
        }
        $mtime = (stat($filename))[9];
    }

    &glynx_file::make_dir("$BASE_DIR$name");
    # print "Download: $url\n";

    if ($DUMP and ($nivel < 1) and ($dump_nivel_zero)) {
        print "  [$nivel => DUMP]\n" if $VERBOSE;
        push_list (\@dump, $url, $referer, $nivel);
        return;
    }

    &my_sleep($SLEEP);

    # GET:

    $res = glynx::download(
        url =>              $url, 
        filename =>         $filename . $PART_SUFFIX, 
        # real_name =>       $real_name,
        agent =>            $AGENT,
        timeout =>           $TIMEOUT,
        cookie_file =>         $COOKIES ? "$BASE_DIR$COOKIES" : '',
        mtime =>            $mtime,
        verbose =>           $VERBOSE,
        quiet =>            $QUIET,
        auth =>             $AUTH,
        referer =>             $referer,
        post_separator =>    $POST_SEPARATOR,
        INDEXFILE =>        $INDEXFILE,
    );

    # DOWNLOAD FINISHED OR ABORTED

    my ($status_line) = $res->status_line;
    my ($return_code) = $status_line =~ /(\d\d\d)/;
    unless ($return_code == 200){
        print "  [ RESPONSE: $return_code ERROR <<\n", $res->as_string, "    >> RESPONSE ]\n" if $VERBOSE;
        $msg = $status_line;
        if (($msg =~ /301/) or ($msg =~ /302/)) {
            # Moved: should do what "Location:" says
            my $location = $res->header("Location");
            print "  [ OK: $msg : $location ]\n" if $VERBOSE;
            $content_base = $res->header("Content-Base");
            if ($location) {
                if ($content_base) {
                    $u1 = URI::URL->new_abs($location, $content_base);
                } 
                else {
                    $u1 = URI::URL->new_abs($location, $url);
                }
                &insert_url ($u1, $url, $nivel - 1);
            }
        }
        elsif ($msg =~ /304/) {
            print "  [ OK: 304 NOT MODIFIED ]\n" unless $QUIET;
        }
        elsif (($msg =~ /404/) and ($url =~ /(.*)${INDEXFILE}$/)) {
            # looks like we are re-processing the cache...
            # try to find out original URL
            print "  [ OOPS: Are we re-processing the cache? Trying $1 ]\n" unless $QUIET;
            push_list (\@retry, $1, $referer, $nivel);
        }
        elsif (($msg =~ /404/) and (! $RETRY_404)) {
            print "  [ ERROR $msg => CANCEL ]\n" unless $QUIET;
            if (-e "$filename$PART_SUFFIX") {
                # cria arquivo not-found
                &glynx_file::my_rename ("$filename$PART_SUFFIX", "$filename$NOT_FOUND_SUFFIX");
            }
            elsif (-e "$filename") {
                &glynx_file::my_rename ("$filename", "$filename$NOT_FOUND_SUFFIX");
            }
            elsif (-e "$filename$NOT_FOUND_SUFFIX") {
            }
            else {
                &glynx_file::my_create_empty("$filename$NOT_FOUND_SUFFIX");
            }
        } else {
            print "  [ ERROR $msg => LATER ]\n" unless $QUIET;
            push_list (\@retry, $url, $referer, $nivel);
            # print "    $retry -- push ", join(",", @retry) , " ($url, $referer, $nivel) \n";
        }
        return;
    } # end: error on download

    # DOWNLOAD FINISHED AND CORRECT

        print "  [ OK: ", $status_line, " ]\n" if $VERBOSE;
        &glynx_file::my_rename ("$filename$PART_SUFFIX", "$filename");
        &glynx_file::my_unlink ("$filename$PART_SUFFIX-1");

        $downloaded = 1;    # new file, not cache
        $num_docs++;

        print "  [ RESPONSE <<\n", $res->as_string, "    >> RESPONSE ]\n" if $VERBOSE;
        #HTTP/1.1 200 OK
        #Connection: close
        #Date: Sat, 23 Sep 2000 08:52:22 GMT
        #Server: Apache/1.3.6 (Unix)
        #Content-Type: text/html
        #Content-Type: image/jpeg
        #Content-Location: http://www.cade.com 
        #Accept-Ranges: bytes
        #Content-Length: 74623
        #Last-Modified: Mon, 17 Apr 2000 18:13:11 GMT

        $Content_Type = $res->content_type;

        &glynx_file::modify_file_attrib($filename, 'Content-Type', $Content_Type);

        # (from: UserAgent.pm)
        if (my $lm = $res->last_modified) {
            # make sure the file has the same last modification time
            utime $lm, $lm, $filename;
        }

    # REDIRECT:

        #     Location:         indica que um novo documento deve ser obtido
        #     Content-Location: indica o lugar onde este documento esta armazenado
        #     Content-Base:     indica o diretorio onde este documento esta armazenado
        #     $res->base        guess directory location

        my @urls = ($url);        # store the url variants
        print "  [ REDIRECT: URL: $url ]\n" if $VERBOSE;

        # create a root-relative url name for relocating ( /... )
        #$url_object = URI::URL->new($url);

        #$relative_url = $url_object->path . " " . 
        #        $url_object->params . " " . 
        #        $url_object->query;
        #print "  [ RELATIVE-URL: $relative_url ]\n";

        $content_base = $res->header("Content-Base");
        unless ($content_base) {
            # try to guess base
            print "  [ GET PATH: 0 ]\n" if $VERBOSE;
            $base = $res->base;
            print "  [ GET PATH: 1 ]\n" if $VERBOSE;
            $u1 =    URI::URL->new_abs($base,$url);
            ($base, $separator, $query) = split_query_from_url($u1);
            print "  [ BASE_PATH: $base ]\n" if $VERBOSE;
            $res->header( 'Content_Base' => "$base"); 
            $content_base = $res->header("Content-Base");
        }

        # check if url directory changed
        #$base_filename = &make_filename($base);

        # find out file name with query, without directory
        print "  [ GET PATH: 2 ]\n" if $VERBOSE;
        $u1 =        URI::URL->new($url);
        $path =        $u1->path;
        $path =~    s|^(.*)\/||g;   # remove directory
        #($url_base) =    $u1 =~ /(.*)$path/;
        print "  [ GET PATH: $url_base $path ]\n" if $VERBOSE;

        ($url_no_query, $separator, $query) = split_query_from_url($url);

        $path1 = $path;
        $path2 = '';
        $path1 .= $separator if $separator;
        $path1 .= $query if $query;
        $path2 .= $separator if $separator;
        $path2 .= $query if $query;

        #print "  [ PATH 1:  $content_base$path1 ]\n" if $VERBOSE;
        #print "  [ PATH 2:  $content_base$path2 ]\n" if $VERBOSE;

        if ($content_base eq ($url_no_query . "/")) {
            $new_url = "$content_base$path2";
        }
        else {
            $new_url = "$content_base$path2";
        }
        print "  [ NEW URL:  $new_url ]\n" if $VERBOSE and ($new_url ne $url);
        push @urls, $new_url if $new_url ne $url;

        $location = $res->header("Location");
        if ($location) {
            if ($content_base) {
                $u1 = URI::URL->new_abs($location, $content_base);
            } 
            else {
                $u1 = URI::URL->new_abs($location, $url);
            }
            &insert_url ($u1, $url, $nivel  - 1);
        } # fim: Location

        $content_location = $res->header("Content-Location");
        if ($content_location) {
            if ($content_base) {
                $u1 = URI::URL->new_abs($content_location, $content_base);
            } 
            else {
                $u1 = URI::URL->new_abs($content_location, $referer);
            }
            push @urls, $u1 if ($u1 ne $url) and ($u1 ne $new_url);
        } # fim: Content-Location


    # SAVE REDIRECT

        if ($#urls > 0) {
            # more than 1 filename option
            print "  [ REDIRECT: ", join(",", @urls), " ]\n" if $VERBOSE;
            # last option is probably better
            # make it the referer for our links
            $url = $urls[-1];

            $new_file_location = &make_filename($urls[-1]);
            $new_file_location = "$BASE_DIR$new_file_location";
            print "  [ FILE-LOCATION: $new_file_location ]\n" if $VERBOSE;
            &glynx_file::make_dir ($new_file_location);
            if (-e $new_file_location) {
                print "  [ FILE-LOCATION: EXISTS ]\n" if $VERBOSE;
            }
            else {
                &glynx_file::my_rename($filename, $new_file_location);
            }
            $filename = $new_file_location;
        }

    # MAKE ALTERNATE FILENAMES

        my @filenames = ($filename);
        foreach (0 .. ($#urls - 1)) {
            $new_file_location = &make_filename($urls[$_]);
            $new_file_location = "$BASE_DIR$new_file_location";
            push @filenames, $new_file_location;
            # print "  [ ALT-FILE-LOCATION: $urls[$_] => $new_file_location ]\n" if $VERBOSE;
        }

        # CHECK SUFFIX (JPG/GIF/HTM)
        # $suffix = "";
        if ($MEDIAEXT and $Content_Type) {
            @suffix = media_suffix($Content_Type);
            print "  [ Content-Type: $Content_Type = ", @suffix, " ]\n" if $VERBOSE;
            unless (grep { $filename =~ /\.$_$/i } @suffix) {
                print "  [ WARNING: Missing Suffix: $filename ]\n" if $VERBOSE;
                $suffix = $suffix[0];
                push @filenames, $filename . "." . $suffix;
            }
        }

    # link other names to main name

        foreach (0 .. $#filenames) {
            print "  [ ALT-FILE-LOCATION: $filenames[$_] ]\n" if $VERBOSE;
            &glynx_file::my_link ($filename, $filenames[$_]);
        }

    # BEGIN CHECKING CONTENT

        if ($Content_Type eq "text/ftp-dir-listing") {
            print "  [ FTP-DIR: Content-Type: text/ftp-dir-listing ]\n" if $VERBOSE;

            # make dir (if not done)
            $content_location = $res->header("Content-Location");
            $url_path = $url;
            if ((! $content_location) and (! ($url_path =~ /\/$/))) {
                $url_path = $url_path . '/' . $INDEXFILE;
                # $url = $url_path;
                $res->header("Content-Location", $url_path);
                print "  [ NEW URL-PATH: ", $url_path, " ]\n";
            }
            # make "href"s
        }


DOWNLOAD_OK:

    # arriving here from FILE: (cache) or from HTTP:

    # haven't we run out of depth? and we don't need to read the file?
    return if ($nivel < 1) and ! $MAKEREL;

    # is it HTML or related?
    $Content_Type = &glynx_file::get_file_attrib($filename, 'Content-Type') unless $Content_Type;
    print "  [ Content-Type: $Content_Type ]\n" if $VERBOSE;

    if ($Content_Type eq "text/html") {
        print "  [ HTML: Content-Type: text/html ]\n" if $VERBOSE;
        $mime_text_html = 1;
    } else {
        $mime_text_html = 0;
    }

    return if ! ($mime_text_html or ($filename =~ /\..?htm.?$/i));
    return if eval "\$filename =~ $default_exclude";

    # ok, it is HTML - let's read it back
    open (FILE, "$filename"); 
        binmode(FILE);
        @a = <FILE>; 
    close (FILE);
    chomp(@a); $_ = join(' ', @a);

    $Full_Text = $_;

    print "  [ CONTENTS <<\n$_\n    >> CONTENTS ]\n" if $VERBOSE;
    my @links1 = ();

    # identify main delimiters
    # <xxx>
    # <SCRIPT LANGUAGE="xxx"> xxx xxx xxx </SCRIPT>

    @tags = /(<.*?>)/g;
    @scripts = /(<SCRIPT.*?<\/SCRIPT>)/ig;
    @forms = /(<FORM.*?<\/FORM>)/ig;


# SCRIPTS

    # <SCRIPT LANGUAGE="xxx"> xxx xxx xxx </SCRIPT>
    # <SCRIPT LANGUAGE="JavaScript" SRC="js/dynlayer.js"></SCRIPT>



# FORMS


    foreach(@forms) {
        # find out what to do with the form
        # keywords:
        #    ACTION="http://www.clicrbs.com.br/redirect.jsp" METHOD="get" 
        #     NAME="tab" VALUE="00001"
        #    NAME="newsID" VALUE="0"
        ($form_action) = /ACTION\s{0,100}?=\s{0,100}?\"?(.{0,500}?)[">\s]/i;
        $form_action = $url unless $form_action;
        ($form_method) = /METHOD\s{0,100}?=\s{0,100}?\"?(.{0,100}?)[">\s]/i;
        $form_method = "GET" unless $form_method;
        s/<FORM.*?>//;    # finished header
        print "  [ FORM: METHOD: $form_method ]\n" if $VERBOSE;
        print "  [ FORM: ACTION: $form_action ]\n" if $VERBOSE;
        @form_names = /NAME\s{0,100}?=\s{0,100}?\"?(.{0,500}?>)/ig;
        # print "  [ FORM: NAMES: ", join(" -- ", @form_names), " ]\n" if $VERBOSE;
        @form_query = ();
        foreach(@form_names) {
            ($item)  = /^(.*?)[">\s]/;
            ($value) = /VALUE\s{0,100}?=\s{0,100}?\"?(.{0,500}?)[">\s]/i or '';
            print "  [ FORM: NAME: $item = $value ]\n" if $VERBOSE;
            push @form_query, uri_escape($item) . "=" . uri_escape($value);
        }

        # create request
        if ($form_method =~ /get/i) {
            $form_request = $form_action . $GET_SEPARATOR . join("\&", @form_query);
            print "  [ FORM: REQUEST: $form_request ]\n" if $VERBOSE;
            # done. Save it.
            push @links1, $form_request;
        }
        elsif ($form_method =~ /post/i) {
            $form_request = $form_action . $POST_SEPARATOR . join("\&", @form_query);
            print "  [ FORM: REQUEST: $form_request ]\n" if $VERBOSE;
            # done. Save it.
            push @links1, $form_request;
        }
        else {
            print "  [ FORM: METHOD NOT IMPLEMENTED: $form_method ]\n" if $VERBOSE;
        }
    } # end forms


# COLLECT LINKS FROM TAGS <...>

    # print join("\n", @tags);

    foreach(@tags, @scripts) {

            # do not consider comments <! > unless they are javascript
            # s/<!.*?>//;

            # <BODY BACKGROUND="..
            push @links1, /<.{0,100}?background\s{0,100}?=\s{0,100}?\"?(.{0,500}?)[">\s]/ig;
    
            # a href, area href, ref href, span href
            push @links1, /<.{0,100}?href\s{0,100}?=\s{0,100}?\"?(.{0,500}?)[">\s]/ig;
    
            # image src, frame src, script src, embed src 
            push @links1, /<.{0,100}?src\s{0,100}?=\s{0,100}?\"?(.{0,500}?)[">\s]/ig;
    
            # javascript: window.open
            # window.open('http://www5.via-rs.com.br/mapa/mapa_n.php3','...
            push @links1, /window\.open\s{0,100}?\(\s{0,100}?\'(.{0,500}?)\'/ig;

            # javascript: jump()
            #  JAVASCRIPT:jump(&quot;http://www.phy.ntnu.edu.tw/java/index.html&quot; )
            push @links1, /&quot;(http\:\/\/.{0,500}?)&quot;/ig;
            push @links1, /\"(http\:\/\/.{0,500}?)\"/ig;
            push @links1, /\'(http\:\/\/.{0,500}?)\'/ig;
            # JAVESCRIPT:jump('color/color_e.html')
            push @links1, /\"(.{0,500}?\.html)\"/ig;
            push @links1, /\'(.{0,500}?\.html)\'/ig;

            push @links1, /\"(.{0,500}?\.htm)\"/ig;
            push @links1, /\'(.{0,500}?\.htm)\'/ig;

            # java: <OPTION  VALUE="http://www.gruposinos.com.br/abc">     
            push @links1, /<option.*?value\s{0,100}?=\s{0,100}?\"?(http\:\/\/.{0,500}?)[">\s]/ig;
    
            # refresh
            push @links1, /<meta.{10,20}?refresh.{10,20}?url=(.{0,500}?)[">\s]/ig;

            # span class -- correction: this is not java, it is css
            # <span class="plntxt"> <b class="xxx">

            # applet
            @a = /<applet(.*?)>/ig;
            # <applet archive="..." code="..." ...
            # <applet codebase="..." code="..." ...
            # <applet code="rc.class" width=460  height=300> 
            my ($archive, $code, $codebase, $applet);
            foreach (@a) {
                if (/archive=\s{0,100}\"{0,1}(.{0,100}?)[">\s]/i) {
                    print "  [ APPLET: archive==$1 ]\n" if $VERBOSE;
                    $archive = $1;
                } else {
                    $archive = "";
                }
                if (/code=\s{0,100}\"{0,1}(.{0,100}?)[">\s]/i) {
                    print "  [ APPLET: code==$1 ]\n" if $VERBOSE;
                    $code =  $1;
                } else { 
                    print "  [ APPLET: code==null ]\n" if $VERBOSE;
                    $code = ""; 
                }
                if (/codebase=\s{0,100}\"{0,1}(.{0,100}?)[">\s]/i) {
                    $codebase = $1; 
                    $codebase .= '/' unless $codebase =~ /\/$/;
                } else { 
                    $codebase = ""; 
                }
                $applet = "$codebase$code";
                push @links1, $archive if $archive;
                push @links1, "$codebase$archive" if $archive and $codebase;
                print "  [ APPLET: $_ => $codebase$code ]\n" if $VERBOSE;
                push @links1, $applet;
                push @links1, $applet . ".class" if !  ($applet =~ /\.class$/);
            } # applets
    } # end tags


# VERIFY LINKS SYNTAX

    # retira repeticoes e links invalidos
    @links1 = sort @links1;
    $prev = '';
    foreach (@links1) {
        # nao mailto:, file:, javascript: ou "javescript:"
        # nao vazio ou com espacos, nao repetido dentro da pagina
        #print "  [ TEST: $_ == $prev ]\n";
        $_  =~ s/#.*//;   # retira o fragmento antes de comparar
        $_  =~ s/[';\{\}\[\]]//g;     # retira o lixo javascript antes de comparar
        if ($_ ne $prev) {
            $prev = $_;
            if (    ($_) and
                (! /^mailto:/i) and 
                (! /^javascript:/i) and 
                (! /^'javascript:/i) and 
                (! /^javescript:/i) and 
                (! /a href\=/i) and 
                (! /\s/i) and 
                (! /^.:/) and    # c:\
                (! /^file:\/\//i)) {
                # valid link
                print "  [ LINK: $_ ]\n" if $VERBOSE;
            }
            else { 
                print "  [ LINK: INVALID $_ ]\n" if $VERBOSE;
                $_ = undef;
            }
        }
        else { $_ = undef }
    } # end verify links


# PROCESS LINKS

    # monta a estrutura @links = ($url, $referer,  $nivel, ...)
    # filter links for MAKEREL
    # $url_object = URI::URL->new($url);

    $url_filename = &make_filename($url); 
    $uri_filename = "file://" . $url_filename;
    ($method) = $url =~ /(.*?\/\/)/;
    print "  [ URI-FILENAME: $uri_filename ]\n" if $VERBOSE;

    $count = 0;
    foreach (@links1) {
        if ($_) {
            $prev = $_;
            $original = $_;

            unless ($downloaded) {
                # this is a cache file. It might have some links to cache files instead of URL.
                # if the link is to a cache file, it should be changed back to the original URL.
                $cache_filename = URI::URL->new_abs($prev, $uri_filename);
                if ($cache_filename =~ /file:\/\/(.*)/) {
                    $cache_filename = $1;
                    ($parent, $cache_filename) = $cache_filename =~ /^(.*)\/(.*?)$/;
                    print "  [ CACHE-FILENAME: $BASE_DIR $parent $cache_filename ]\n" if $VERBOSE;
                    # do we know the original URL?
                    $prev = $method . &check_translation_url($cache_filename, "$BASE_DIR$parent");
                    print "  [ CACHE-URL-NAME: $prev ]\n" if $VERBOSE;
                }
            }

            &insert_url ($prev . '', $url . '', $nivel  - 1);

            if ($MAKEREL and $mime_text_html) {
                # make links "local"
                $u1 = URI::URL->new_abs($prev, $url);
                $new_filename = &make_filename($u1); 
                $new_file_uri = URI::URL->new("file://" . $new_filename);
                $rel_filename = $new_file_uri->rel($uri_filename);

                if ($rel_filename =~ /file:\/\//) {
                    # not ready yet...
                    print "  [ REL: NOT SAME HOST: ", $u1->host, " ]\n" if $VERBOSE;
                    # put enough "../../../" on it
                    ($base_filename) = $filename =~ /${BASE_DIR}(.*)/;
                    $file_depth = $base_filename =~ tr|\/|\/|;
                    print "  [ FILENAME: $base_filename -- $filename $file_depth ]\n" if $VERBOSE;
                    $rel_filename = ("../" x $file_depth) . $new_filename;
                }

                $count+= $Full_Text =~ s/([=\"\s])\Q$original\E([\"\s\>])/$1$rel_filename$2/g;
                print "  [ REL: $count: $rel_filename -- $prev ]\n" if $VERBOSE;
            }
        }
    }


# SAVE "LOCAL" FILE

    if ($count) {
        print "  [ REL: COUNT $count << $Full_Text >> REL ]\n" if $VERBOSE;
        # make backup
        &glynx_file::my_copy($filename, $filename . $BACKUP_SUFFIX) if $MAKE_BACKUP;
        # write file back to disk
        $lm = (stat($filename))[9];    # keep last modification time
        open (FILE, ">$filename"); 
            binmode(FILE);
            print FILE $Full_Text; 
        close (FILE);
        utime $lm, $lm, $filename if $lm;
    }  


# DONE DOWNLOAD

} # end: download


sub insert_url {
    my ($url, $referer, $nivel) = @_;
    my ($tmp, $tmp2);

    print "  [ INSERT_URL: ($url, $referer, $nivel) ]\n" if $VERBOSE;
    return if $nivel < 0;

    # make absolute URL from referer, without fragment:
    $_ = $url;
    #print "LINKS $#links ++ $_  ++";
    $_ =~ s/#.*//;   # retira o fragmento
    $url = URI::URL->new_abs($_, $referer);

    # resolve erros de javascript misturado com html
    my $str_url = $url;
    if ($str_url =~ s/[';]//g) {
        print "  [ ERR JAVASCRIPT: ", $url, " => ", $str_url, " ]\n" if $VERBOSE;
        $url->new($str_url);
    }

    # resolve erro: http://host/../file esta sendo gravado em ./host/../file => ./file
    my $path = $url->path;
    #print "  [ PATH: ", $url->path, " ]\n" if $path =~ /\.\./;
    # /../ => /
    if ($path =~ s/^\/\.\.\//\//g) {
        print "  [ ERR PATH: ", $url->path, " => ", $path, " ]\n" if $VERBOSE;
        $url->path($path);
    }

    # cuida para ficar neste host
    # $OUT_DEPTH == 0  - nao faz download externo
    # $OUT_DEPTH >= 1  - deixa para a rotina de download decidir
    if ( ($OUT_DEPTH < 1) and not (grep { $url =~ /$_/ } @PREFIX) ) {
        print "  [ OUT: $url ]\n" if $VERBOSE;
        return;
    }

    # pre-processador: EXCLUDE, LOOP, SUBST
    $_ = $url;
    print "  [ PREPROCESSOR: URL => $url ]\n" if $VERBOSE;
    if (eval $SUBST) {
        print "  [ SUBST $SUBST => $_ ]\n" if $VERBOSE;
        $url = $_;
    }
    foreach my $exclude (@EXCLUDE) {
        if ( eval $exclude ) {
            print "  [ EXCLUDE $exclude ]\n" if $VERBOSE;
            return;
        }
    }
    if ($loop[0] and (/$loop[0]/)) {
        $tmp = $_;
        print "  [ LOOP: BEGIN $loop[0] : $loop[1] = ", join(",", eval $loop[1]), " ]\n" if $VERBOSE;
        foreach (eval $loop[1]) {
            $tmp2 = $tmp;
            $tmp2 =~ s/$loop[0]/$_/g;
            print "  [ LOOP: $tmp2 ]\n" if $VERBOSE;
            &insert_url_2 ($tmp2, $referer, $nivel);
        }
        print "  [ LOOP: END ]\n" if $VERBOSE;
    } else {
        &insert_url_2 ($url, $referer, $nivel);
    }
} # fim: insert_url

sub insert_url_2 {
    # "armazenador" geral de links/dump
    my ($url, $referer, $nivel) = @_;

    #$teste = eval "\$url =~ $default_exclude";
    #print " ++ teste [$teste] $url\n";
    print "  [ PUSH: $url $nivel ]\n" if $VERBOSE;

    if ($#links > ($DUMP_MAX * $LIST_SIZE)) {
        if (! $dump_nivel_zero) {
            $dump_nivel_zero = 1 ;    # ask for help - already has enough to do
            print "  [ DUMP: STARTED ]\n" if $VERBOSE;
        }
    }

    if (! $DUMP) {
        print "  [ NO DUMP ]\n" if $VERBOSE;
        push_list (\@links, $url, $referer, $nivel);
    } else {
        if (($dump_nivel_zero) and ((eval "\$url =~ $default_exclude") or ($nivel == 0))) {
            print "  [ => DUMP ]\n" if $VERBOSE;
            push_list (\@dump, $url, $referer, $nivel);
        } else {
            print "  [ NO DUMP: DEPTH $nivel ]\n" if $VERBOSE;
            push_list (\@links, $url, $referer, $nivel);
        }
    }
}

sub push_list {
    # "armazenador" - $arrayp == \@array
    my ($arrayp, $url, $referer, $nivel) = @_;
    my ($ini, $fim);
    my $ini_index = 0;                    # begin of first record
    my $fim_index = $#$arrayp - $LIST_SIZE + 1;    # begin of last record
    # testa o inicio e o final da lista, e depois o meio
    while ($fim_index >= $ini_index) {
        # print " $$arrayp=", $$arrayp[$index], "--", $$arrayp[$index+1], " ";
        if ( ($url eq $$arrayp[$ini_index]) or
             ($url eq $$arrayp[$fim_index]) ) {
            print "  [ PUSH: repetido ]\n" if $VERBOSE;
            return;
        }
        $fim_index -= $LIST_SIZE;
        $ini_index += $LIST_SIZE;
    }
    push @$arrayp,  ($url, $referer, $nivel);
}

sub shift_list {
    # complementa push_list retirando o primeiro elemento da lista
    # $arrayp == \@array
    my ($arrayp) = @_;
    my $url =     shift @$arrayp;
    my $referer = shift @$arrayp;
    my $nivel =   shift @$arrayp;
    print "  [ SHIFT: $url ]\n" if $VERBOSE;

    return ($url, $referer, $nivel);
}

sub not_implemented {
    my ($var) = @_;
    return if $var < 1;    # [0] == nome da funcao
    print "  [ CFG: $var NOT IMPLEMENTED ]\n" if $VERBOSE;
}


# HTTP SERVER

sub http_server {
      my ( $c, $r);    # $http_daemon,

    &preprocess_menu_options;

      $http_daemon = new HTTP::Daemon (
        LocalPort => $SERVER_PORT, 
        Timeout => 5) or
            return 0;    # "Can't start http daemon at port $SERVER_PORT";
      #$c = new myCGI or
    #    die "Can't start myCGI";
      print "Glynx - Download Manager\n\n";
    print "Glynx $VERSION - Copyright (c) 2000 Flavio Glock. All rights reserved.\nThis program is free software\n";

      print "\n\nUser interface server running at ", $http_daemon->url, "\n\n";
while (1) {
      if ($c = $http_daemon->accept) {   # $c = HTTP::Daemon::ClientConn
        # bless $c, "IO::Socket::INET";
        # print "peer: ", getpeername($c), "\n";
          $r = $c->get_request;   # $r = HTTP::Request
          if ($r) {
              if (($r->method eq 'GET') or ($r->method eq 'POST')) {
                  $c->send_basic_header();
                  $c->send_response();
                  bless $c, "myCGI";
                  $c->cgi($r);
              } else {
                  $c->send_error("RC_FORBIDDEN")
              }
          }
          $c = undef;  # close connection
    }
    &check_stop;
    print ".";
}
    $http_daemon = undef;
    exit 0;
} # http server


sub preprocess_source_file {
    my ($filename,$level) = @_;
    my (@a, $print);
    my $UNIXLF = "\012";
    open (FILE, "<$filename"); @a = <FILE>; close(FILE);
    $print = 1;
    foreach(@a) { 
        s/[\r\n\012\015]//g;
        my ($cmd, $val) = /^##(\w*) ?(.*?)$/;
        if ($cmd eq 'NOINCLUDE') {
            $print = 0;
        }
        elsif ($cmd eq 'INCLUDE') {
            $print = 1;
            # print "# ********* BEGIN: $val ***********\012";
            # print "# ********* BEGIN: $prog_dir/$val ***********\012";
            if ($val eq "HELP") {
                my $b = &list_options;
                $b =~ s/[\r\n\012\015]/\000/g;
                $b =~ s/\000\000/\000/g;
                $b =~ s/\000\000/\000/g;
                $b =~ s/\000/${UNIXLF}/g; 
                print $b; 
            }
            else {
                preprocess_source_file("$prog_dir/$val", $level+1) if $val;
            }
            # print "# ********* END: $prog_dir/$val ***********\012";
            # print "# ********* END: $val ***********\012";
        }
        else {
            $_ = "\t" . $_ if $level and ! /^(=|E)/;    # don't tab if EOT or pod
            s/\t/    /g;    # expand tabs to 4 spaces
            s/  $//g;    # cleanup end of line
            s/  $//g;    # cleanup end of line to help pod2html
            print $_, $UNIXLF if $print;
        }
    }
}

sub make_CPAN {
    # accepts these preprocessor directives:
        ##NOINCLUDE        - do not include this in distribution
        ##INCLUDE [file]    -  Turns off NOINCLUDE. Include file, if present.

    # Collecting files for CPAN distribution
    my $filename_from = "$progname.pl";
    my $filename_to = $filename_from;
    $filename_to =~ s/\.pl/-${VERSION}\.pl/;
    print "New filename: $filename_to\n";
    open (FILE_TO, ">$filename_to");
    binmode(FILE_TO);
    select (FILE_TO);
    preprocess_source_file($filename_from,0);
    select (STDOUT);
    close (FILE_TO);
    exit 0;
}

sub print_version {
    print <<EOT;
This is $progname.pl Version $VERSION

Copyright 2000, Flavio Glock.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
EOT
    exit 0;
}

sub usage {
    print <<EOT;
Usage: 
  Do-everything at once:    
    $progname.pl [options] <URL>
  Save work to finish later:
    $progname.pl [options] --dump="dump-file" <URL>
  Finish saved download:    
    $progname.pl [options] "download-list-file"
  Network mode (client/slave)
  - Clients:  
    $progname.pl [options] --dump="dump-file" <URL>
  - Slaves (will wait until there is something to do): 
    $progname.pl [options] --slave

EOT
    print &list_options;
    exit 0;
}


sub list_options {
    my $show_subst = $SUBST;
    $show_subst =~ s/\\/\\\\/g;
    $_ = <<EOT;
Very basic:
  --version         Print version number ($VERSION) and quit
  --verbose         More output
  --quiet           No output
  --help            This page
  --cfg-save        Save configuration to file "$CFG_FILE"
  --base-dir=DIR    Place to load/save files (default is "$BASE_DIR")

Development only:
  --make-cpan       Preprocess files to make Glynx distribution

Download options are:
  --sleep=SECS      Sleep between gets, ie. go slowly (default is $SLEEP)
  --prefix=PREFIX   Limit URLs to those which begin with PREFIX (default is URL)
                    Multiple --prefix are allowed
  --depth=N         Maximum depth to traverse (default is $DEPTH)
  --out-depth=N     Maximum depth to traverse outside of PREFIX (default is $OUT_DEPTH)
  --referer=URI     Set initial referer header (default is "$REFERER")
  --limit=N         A limit on the number documents to get (default is $MAX_DOCS)
  --retry=N         Maximum number of retrys (default is $RETRY_MAX)
  --timeout=SECS    Timeout value - increases on retrys (default is $TIMEOUT)
  --agent=AGENT     User agent name (default is "$AGENT")
  --mirror          Checks all existing files for updates (default is --nomirror)
  --mediaext        Creates a file link, guessing the media type extension (.jpg, .gif)
                    (Windows perl makes a file copy) (default is --nomediaext)
  --makerel         Make Relative links. Links in pages will work in the
                    local computer.
  --auth=USER:PASS  Set authentication credentials
  --cookies=FILE    Set up a cookies file (default is no cookies)
  --name-len-max    Limit filename size (default is $NAME_LEN_MAX)
  --dir-depth-max   Limit directory depth (default is $DIR_DEPTH_MAX)

Multi-process control:
  --slave           Wait until a download-list file is created (be a slave)
  --server          Be an http user-interface server, with slave processes
  --children        How many slaves will this process spawn (default is "$CHILDREN")
  --port=N          Http server TCP/IP port (default is "$SERVER_PORT")
  --stop            Stop slave
  --restart         Stop and restart slave

Other:
  --indexfile=FILE  Index file in a directory (default is "$INDEXFILE")
  --part-suffix=.SUFFIX (default is "$PART_SUFFIX") (example: ".Getright" ".PART")
  --dump=FILE       (default is "$DUMP") make download-list file, 
                    to be used later
  --dump-max=N      (default is $DUMP_MAX) number of links per download-list file
  --invalid-char=C  (default is "$INVALID_CHAR")
  --exclude=/REGEXP/x (default is "@EXCLUDE") Don't download matching URLs
                    Multiple --exclude are allowed
  --loop=REGEXP:INITIAL..FINAL (default is "$LOOP") (eg: xx:a,b,c  xx:'01'..'10')
  --subst=s/REGEXP/VALUE/x (default is "$show_subst") ("\\" must be written "\\\\")
  --404-retry       will retry on error 404 Not Found (default). 
  --no404-retry     creates an empty file on error 404 Not Found.
EOT
    return $_;
}

1;

