#!/usr/bin/perl

use warnings;
use strict;

my $basedir = "docs/log-message-tags";
my $serial_file = "$basedir/next-number";
my $serial = read_serial($serial_file);
my $orig_serial = $serial;
my %tags;

foreach my $file (@ARGV) {
    if ($file !~ /\.c$/) {
        print STDERR "Skipping non-C file $file\n";
        next;
    }
    process($file);
}
write_file($serial_file, "$serial\n") if $serial != $orig_serial;

my $list = "";
foreach my $tag (sort keys %tags) {
    my $d = $tags{$tag};
    $list .= "$tag: $d->{file}:$d->{line}: $d->{msg}\n";
}
write_file("$basedir/list", $list);

exit 0;

sub process
{
    my $file = shift;

    open(my $fh, "<", $file) or die "open $file: $!";
    #print STDERR "processing $file\n";
    my $line = <$fh>;
    my $modified;
    my $result = "";
    while (defined $line) {
        if ($line =~ s{APLOGNO\(\),?}{gen_tag($file)}e) {
            $modified = 1;
        }
        if ($line =~ /APLOGNO\(\s*(\d{5})\s*\)/ ) {
            my $lineno = $.;
            my $tag = $1;
            while (1) {
                if ($line =~ s/.*?
                               APLOGNO\(\s*
                               (\d+)
                               \s*\)
                               (
                                (?: [\s\n]*
                                    (?:"
                                       (?:\\"|[^"])+    # a string constant
                                       "
                                      |
                                       \w+              # things like APR_SIZE_T_FMT
                                    )
                                )*                      # zero or more string fragments. We allow
                                                        # zero because some logging constructs may
                                                        # use things like:
                                                        #     logno=APLOGNO(...);
                                                        #     ap_log_...(..., "%s...", logno, ...)
                               )
                               [\s\n]*
                               [,);:\\]                 # the "," before the next argument,
                                                        # or the closing brace of ap_log...(),
                                                        # or the end of a statement (if used
                                                        # outside of ap_log_...),
                                                        # or ":" in things like:
                                                        #     cond ? "msg1" : "msg2",
                                                        # or "\" at the end of a macro line
                              //xs) {
                    my $match = $&;
                    note_tag($file, $lineno, $1, $2);
                    $result .= $match;
                    last;
                }
                else {
                    my $next = <$fh>;
                    defined $next or die "can't find end of format string in $file:$lineno";
                    $line .= $next;
                    if ($next =~ /^#/) {
                        # log format inside preprocessor #if, that's too complicated
                        note_tag($file, $lineno, $tag, "");
                        $result .= $line;
                        $line = "";
                        last;
                    }
                }
            };
        }
        else {
            $result .= $line;
            $line = <$fh>;
        }
    }
    close $fh;
    write_file($file, $result) if $modified;
}

sub gen_tag
{
    my $file = shift;
    my $msg = shift;
    my $tag = sprintf('%05d', $serial++);
    return "APLOGNO($tag)";
}

sub note_tag
{
    my $file = shift;
    my $lineno = shift;
    my $tag = shift;
    my $msg = shift;

    my $oneline = "";
    while (length $msg) {
        $msg =~ s/^[\s\n]+//s;
        if ($msg =~ s{^"((?:\\"|[^"])+)"}{}) {
            $oneline .= $1;
        }
        if ($msg =~ s{^(\w+)}{}) {
            $oneline .= $1;
        }
    }
    if (exists $tags{$tag}) {
        print STDERR "WARNING: Duplicate tag $tag at $tags{$tag}->{file}:$tags{$tag}->{line} and $file:$lineno\n";
    }
    if ($tag >= $serial) {
        print STDERR "WARNING: next-number $serial inconsistent with tag $tag at $file:$lineno, adjusting\n";
        $serial = $tag + 1;
    }
    $tags{$tag} = { file => $file, line => $lineno, msg => $oneline };
}

sub write_file
{
    my $file = shift;
    my $data = shift;

    my $tmpname = "$file.$$.tmp";

    open(my $fh, ">", $tmpname) or die "open $tmpname: $!";
    print $fh $data or die "write $tmpname: $!";
    close($fh) or die "close $tmpname: $!";
    rename($tmpname, $file) or die "rename $tmpname -> $file: $!";
    print STDERR "Updated $file\n";
}

sub read_serial
{
    my $name = shift;
    open(my $fh, "<", $name) or die "can't open $name, need to be started in the top source dir";
    my $num = <$fh>;
    chomp $num;
    $num =~ /^\d+$/ or die "invalid serial in $name: $num";
    return $num;
}
