Cross Reference: /solaris-desktop/scripts/copyright-extractor

12636N/A#!/usr/perl5/bin/perl
12636N/A#
12636N/A# Script for extracting copyright and licensing information from source code
12636N/A#
12636N/A# CDDL HEADER START
12636N/A#
12636N/A# The contents of this file are subject to the terms of the
12636N/A# Common Development and Distribution License, Version 1.0 only
12636N/A# (the "License").  You may not use this file except in compliance
12636N/A# with the License.
12636N/A#
12636N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
12636N/A# or http://www.opensolaris.org/os/licensing.
12636N/A# See the License for the specific language governing permissions
12636N/A# and limitations under the License.
12636N/A#
12636N/A# When distributing Covered Code, include this CDDL HEADER in each
12636N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
12636N/A# If applicable, add the following below this CDDL HEADER, with the
12636N/A# fields enclosed by brackets "[]" replaced with your own identifying
12636N/A# information: Portions Copyright [yyyy] [name of copyright owner]
12636N/A#
12636N/A# CDDL HEADER END
12636N/A#
12636N/A#
19915N/A# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
12636N/A#
12636N/A
12636N/Ause strict;
12636N/Ause warnings;
12636N/Ause Cwd;
12636N/Ause Getopt::Long qw(:config gnu_compat no_auto_abbrev bundling pass_through);
12636N/A
12701N/Amy $min_merge = 10;
12701N/A
12636N/Asub usage() {
12636N/A    print "copyright-extractor [options] <source directory>\n";
12636N/A    print "\n";
12636N/A    print "Options:\n";
12636N/A    print "  -r, --raw\n";
12636N/A    print "         Print raw comments only, do not attempt to merge,\n";
12636N/A    print "         only unify identical comments.\n";
12636N/A    print "  -c, --copyright-first\n";
12636N/A    print "         Attempt to move copyright statements to the start of the\n";
12636N/A    print "         comment block.\n";
12636N/A    print "         Note: when using this option, there is a chance that\n";
12636N/A    print "         lines get mixed up if a copyright statement extends to\n";
12636N/A    print "         more than one line.\n";
12701N/A    print "  -m n --min=n\n";
12701N/A    print "         only merge if there are at least n consecutive identical\n";
12701N/A    print "         lines.  default: $min_merge\n";
12636N/A    print "  -g, --gpl\n";
12636N/A    print "         Add the disclaimer about GPLv2 to the beginning of the\n";
12636N/A    print "         output if any of the comments look like GPL/LGPL\n";
12636N/A    print "  -O, --omitted\n";
12636N/A    print "         Print a list of files that were not checked\n";
12636N/A    print "  -h, --help\n";
12636N/A    print "         Print this usage information\n";
12636N/A    print "  -d n, --debug=n\n";
12636N/A    print "         Turn on debug output.\n";
12636N/A}
12636N/A
12636N/Amy %blurbs;
12636N/A
12636N/Amy $gpl_found = 0;
12636N/A
12636N/Amy @files_omitted;
12636N/Amy $debug = 0;
12636N/Amy $dumb_mode = 0;
12636N/Amy $copyright_first = 0;
12636N/Amy $gpl_disclaimer = 0;
12636N/Amy $print_omitted = 0;
12636N/A
12636N/Amy @dirs;
12636N/Asub process_args {
12636N/A    my $arg = shift;
12636N/A
12636N/A    if ($arg =~ /^-/) {
12636N/A    print "Unknown option: $arg\n";
12636N/A    print "Try --help for usage.\n";
12636N/A    exit (1);
12636N/A    }
12636N/A
12636N/A    push (@dirs, $arg);
12636N/A}
12636N/A
12636N/Asub process_options {
12636N/A
12636N/A    Getopt::Long::Configure ("bundling");
12636N/A
12636N/A    GetOptions ('d|debug=n' => sub { shift; $debug = shift; },
12701N/A        'm|min=n' => sub { shift; $min_merge = shift; },
12636N/A        'r|raw' => sub { $dumb_mode = 1; },
12636N/A        'c|copyright-first' => sub { $copyright_first = 1; },
12636N/A        'O|omitted' => sub { $print_omitted = 1; },
12636N/A        'g|gpl' => sub { $gpl_disclaimer = 1; },
12636N/A        'h|help' => sub { usage (); exit (0); },
12636N/A        '<>' => \&process_args);
12636N/A}
12636N/A
12636N/Ause constant FTYPE_IGNORE => 0;
12636N/Ause constant FTYPE_C => 1;
12636N/Ause constant FTYPE_PERL => 2;
12636N/Ause constant FTYPE_PYTHON => 3;
12636N/Ause constant FTYPE_SHELL => 4;
12636N/Ause constant FTYPE_JAVA => 5;
12636N/A
12636N/A# a very simple file type check based on the file name
12636N/A# fname: the file name to classify
12636N/A# Returns: one of the above contants
12636N/Asub get_file_type ($) {
12636N/A    my $fname = shift;
12636N/A
12636N/A    if ($fname =~ /([~]$|\/(ChangeLog|configure\.in|Makefile|ltmain\.sh|README|NEWS|INSTALL|HACKING|configure$|config\.)$)/) {
12636N/A    # some file names to ignore
12636N/A    push (@files_omitted, $fname);
12636N/A    return FTYPE_IGNORE;
12636N/A    } elsif ($fname =~ /\.(am|ac|o|lo|ps|la|cache|diff|out|log|guess|spec)$/) {
12636N/A    # some more file names to ignore
12636N/A    push (@files_omitted, $fname);
12636N/A    return FTYPE_IGNORE;
12636N/A    } elsif ($fname =~ /\.(c|h|hpp|cpp|C|CPP|cc|CC)$/) {
12636N/A    return FTYPE_C;
12636N/A    } elsif ($fname =~ /\.pl$/) {
12636N/A    return FTYPE_PERL;
12636N/A    } elsif ($fname =~ /\.py$/) {
12636N/A    return FTYPE_PYTHON;
12636N/A    } elsif ($fname =~ /\.(sh|ksh|csh)$/) {
12636N/A    return FTYPE_SHELL;
12636N/A    } elsif ($fname =~ /\.(java)$/) {
12636N/A    return FTYPE_JAVA;
12636N/A    } else {
12636N/A    # FIXME: could do something smart here
12636N/A    push (@files_omitted, $fname);
12636N/A    return FTYPE_IGNORE;
12636N/A    }
12636N/A}
12636N/A
12636N/A# return 1 if the string includes words that suggest that the string
12636N/A# is some sort of legal text.  If none of these words appear in the
12636N/A# string, this program will ignore it and assume that it's some other
12636N/A# comment that happens to be at the beginning of the file
12636N/Asub is_legalese ($) {
12636N/A    my $str = shift;
12636N/A
12636N/A    $str = lc ($str);
20126N/A    if ($str =~ /(licen[cs]|legal|terms|condition|copyright|rights|\(c\)|copying|distribut|gpl|gnu|source code|software|permission|authori)/) {
12636N/A    return 1;
12636N/A    }
12636N/A
12636N/A    return 0;
12636N/A}
12636N/A
12636N/A# extract the comments
12636N/Asub extract_comments_shell($) {
12636N/A    my $fname = shift;
12636N/A
12636N/A    my $blurb;
12636N/A    my $line;
12636N/A    open SRCFILE, "<$fname" or die "failed to open file $fname";
12636N/A    while ($line = <SRCFILE>) {
12636N/A    chomp ($line);
12636N/A    next if $line =~ /^#!/;
20126N/A    if ($line =~ /^[^#]/) {
20126N/A        # not a comment line
20126N/A        if (defined ($blurb)) {
20126N/A        if(is_legalese ($blurb)) {
20126N/A            if (defined($blurbs{$fname})) {
20126N/A            push(@{$blurbs{$fname}}, $blurb);
20126N/A            } else {
20126N/A            my @b = ($blurb);
20126N/A            $blurbs{$fname} = \@b;
20126N/A            }
20126N/A        }
20126N/A        }
20126N/A        $blurb = undef;
20126N/A        next;
20126N/A    }
20126N/A
12636N/A    $line =~ s/^#//;
12636N/A    # delete certain types of comments, like emacs mode spec, etc
12636N/A    $line =~ s/^\s*-\*-.*-\*-\s*$//;
12636N/A    $line =~ s/^\s\$Id:.*\$\s*$//;
12636N/A    $line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
12636N/A
12636N/A    chomp ($line);
12636N/A
12636N/A    if (defined $blurb) {
12636N/A        $blurb = $blurb . "\n" . $line;
12636N/A    } elsif ($line ne '') {
12636N/A        $blurb = $line;
12636N/A    }
12636N/A    $line = undef;
12636N/A    }
12636N/A    close SRCFILE;
12636N/A
12636N/A    if (defined ($blurb) and is_legalese ($blurb)) {
20126N/A    if (defined ($blurbs{$fname})) {
20126N/A        push(@{$blurbs{$fname}}, $blurb);
20126N/A    } else {
20126N/A        my @b = ($blurb);
20126N/A        $blurbs{$fname} = \@b;
20126N/A    }
12636N/A    }
12636N/A}
12636N/A
12636N/Asub extract_comments_c($) {
12636N/A    my $fname = shift;
12636N/A
12636N/A    my $blurb;
12636N/A    my $in_comment_block = 0;
12636N/A    open SRCFILE, "<$fname" or die "failed to open file $fname";
12636N/A    my $line;
12636N/A    while ($line = <SRCFILE>) {
12636N/A    chomp ($line);
12636N/A    if ($in_comment_block) {
12636N/A        if ($line =~ /\*\//) {
12636N/A        $line =~ s/\*\/.*//;
12636N/A        $in_comment_block = 0;
12636N/A        } elsif ($line =~ /^\/\//) {
12636N/A        $line =~ s/^\/\///;
12636N/A        } elsif ($line =~ /^( \*|\*)/) {
12636N/A        $line =~ s/^( \*|\*)//;
12636N/A        }
12636N/A    } else {
19915N/A        if ($line =~ /^#(define|pragma)/ ) {
19915N/A        if ($line =~ /\/\//) {
19915N/A            $line =~ s/^.*\/\//\/\//;
19915N/A        } elsif ($line =~ /\/\*/) {
19915N/A            $line =~ s/^.*\/\*/\/\*/;
19915N/A        } else {
19915N/A            next;
19915N/A        }
19915N/A        }
12636N/A        if ($line =~ /^\s*\/\*(.*)\*\//) {
12636N/A        $line =~ s/^\s*\/\*(.*)\*\//$1/g;
12636N/A        } elsif ($line =~ /^\s*\/\*/) {
12636N/A        $in_comment_block = 1;
12636N/A        $line =~ s/^\s*\/\*//;
12636N/A        } elsif ($line =~ /^\/\//) {
12636N/A        $line =~ s/^\s*\/\///;
12636N/A        } elsif ($line eq '') {
12636N/A        # add to blurb if not the start of the blurb
12636N/A        } else {
20126N/A        # end of comment block
20126N/A        if (defined ($blurb)) {
20126N/A            if(is_legalese ($blurb)) {
20126N/A            if (defined($blurbs{$fname})) {
20126N/A                push(@{$blurbs{$fname}}, $blurb);
20126N/A            } else {
20126N/A                my @b = ($blurb);
20126N/A                $blurbs{$fname} = \@b;
20126N/A            }
20126N/A            }
20126N/A        }
20126N/A        $blurb = undef;
20126N/A        next;
12636N/A        }
12636N/A    }
12636N/A    # delete certain types of comments, like emacs mode spec, etc
12636N/A    $line =~ s/^\s*-\*-.*-\*-\s*$//;
12636N/A    $line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
12636N/A    $line =~ s/^\s\$Id:.*\$\s*$//;
12636N/A    $line =~ s/^\s*\**\s*\\ingroup\s*.*$//;
12636N/A    $line =~ s/^\s*\**\s*\\file\s*.*$//;
12636N/A    $line =~ s/^\s*\**\s*\@-type\@\s*$//;
12636N/A
12636N/A    chomp ($line);
12636N/A
12636N/A    if (defined $blurb) {
12636N/A        $blurb = $blurb . "\n" . $line;
12636N/A    } elsif ($line ne '') {
12636N/A        $blurb = $line;
12636N/A    }
12636N/A    $line = undef;
12636N/A    }
12636N/A    close SRCFILE;
12636N/A    if (defined ($blurb) and is_legalese ($blurb)) {
20126N/A    if (defined ($blurbs{$fname})) {
20126N/A        push(@{$blurbs{$fname}}, $blurb);
20126N/A    } else {
20126N/A        my @b = ($blurb);
20126N/A        $blurbs{$fname} = \@b;
20126N/A    }
12636N/A    }
12636N/A}
12636N/A
13093N/Asub extract_comments_python($) {
13093N/A    my $fname = shift;
13093N/A
13093N/A    my $blurb;
13093N/A    my $in_comment_block = 0;
13093N/A    open SRCFILE, "<$fname" or die "failed to open file $fname";
13093N/A    my $line;
13093N/A    while ($line = <SRCFILE>) {
13093N/A    chomp ($line);
13093N/A    if ($in_comment_block) {
13093N/A        if ($line =~ /"""/) {
13093N/A        $line =~ s/"""//;
13093N/A        $in_comment_block = 0;
13093N/A        } elsif ($line =~ /#/) {
13093N/A        $line =~ s/^#//;
13093N/A        }
13093N/A    } else {
13093N/A        if ($line =~ /^\s*"""(.*)"""/) {
13093N/A        $line =~ s/^\s*"""(.*)"""/$1/g;
20126N/A        } elsif ($line =~ /^\s*r"""(.*)"""/) {
20126N/A        $line =~ s/^\s*r"""(.*)"""/$1/g;
13093N/A        } elsif ($line =~ /^\s*"""/) {
13093N/A        $in_comment_block = 1;
13093N/A        $line =~ s/^\s*"""//;
20126N/A        } elsif ($line =~ /^\s*r"""/) {
20126N/A        $in_comment_block = 1;
20126N/A        $line =~ s/^\s*r"""//;
13093N/A        } elsif ($line =~ /^\/\//) {
13093N/A        $line =~ s/^\s*"""//;
13093N/A        } elsif ($line eq '') {
13093N/A        # add to blurb if not the start of the blurb
13093N/A        } else {
20126N/A        # end of comment block
20126N/A        if (defined ($blurb)) {
20126N/A            if(is_legalese ($blurb)) {
20126N/A            if (defined($blurbs{$fname})) {
20126N/A                push(@{$blurbs{$fname}}, $blurb);
20126N/A            } else {
20126N/A                my @b = ($blurb);
20126N/A                $blurbs{$fname} = \@b;
20126N/A            }
20126N/A            }
20126N/A        }
20126N/A        $blurb = undef;
20126N/A        next;
13093N/A        }
13093N/A    }
13093N/A    # delete certain types of comments, like emacs mode spec, etc
13093N/A    $line =~ s/^\s*-\*-.*-\*-\s*$//;
13093N/A    $line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
13093N/A    $line =~ s/^\s\$Id:.*\$\s*$//;
13093N/A
13093N/A    chomp ($line);
13093N/A
13093N/A    if (defined $blurb) {
13093N/A        $blurb = $blurb . "\n" . $line;
13093N/A    } elsif ($line ne '') {
13093N/A        $blurb = $line;
13093N/A    }
13093N/A    $line = undef;
13093N/A    }
13093N/A    close SRCFILE;
13093N/A    if (defined ($blurb) and is_legalese ($blurb)) {
20126N/A    if (defined ($blurbs{$fname})) {
20126N/A        push(@{$blurbs{$fname}}, $blurb);
20126N/A    } else {
20126N/A        my @b = ($blurb);
20126N/A        $blurbs{$fname} = \@b;
20126N/A    }
13093N/A    }
13093N/A}
13093N/A
12636N/Asub extract_comments($);
12636N/A
12636N/A# process a directory or a file recursively: extract the comments
20126N/A# from the beginning of each file and save them in %blurbs
12636N/Asub extract_comments($) {
12636N/A    my $fname = shift;
12636N/A    if (-d $fname) {
12636N/A    # directory -> process recursively
12636N/A    opendir(DIR, $fname) || die("Cannot open directory $fname");
12636N/A    my @thefiles= readdir(DIR);
12636N/A    closedir(DIR);
12636N/A    foreach my $f (@thefiles) {
12636N/A        next if $f eq '.';
12636N/A        next if $f eq '..';
12636N/A        next if $f eq '.libs';
12636N/A        next if $f eq 'intl';
12636N/A        extract_comments ("$fname/$f");
12636N/A    }
12636N/A    } elsif (-f $fname) {
12636N/A    # regular file -> identify file type and read comments
12636N/A    my $ftype = get_file_type ($fname);
12636N/A    return if $ftype == FTYPE_IGNORE;
12636N/A    if ($ftype == FTYPE_C) {
12636N/A        extract_comments_c ($fname);
12636N/A    } elsif ($ftype == FTYPE_PERL) {
12636N/A        extract_comments_shell ($fname);
12636N/A    } elsif ($ftype == FTYPE_SHELL) {
12636N/A        extract_comments_shell ($fname);
12651N/A    } elsif ($ftype == FTYPE_PYTHON) {
13093N/A        extract_comments_python ($fname);
12636N/A    } elsif ($ftype == FTYPE_JAVA) {
12636N/A        extract_comments_c ($fname);
12636N/A    }
12660N/A    } else {
12660N/A    print STDERR "ERROR: $fname: no such file or directory\n";
12636N/A    }
12636N/A}
12636N/A
12636N/A# like uniq(1)
12636N/Asub uniq (@) {
12636N/A    my @list = @_;
12636N/A    my $prev;
12636N/A    if (not @list) {
12636N/A    return @list;
12636N/A    }
12636N/A    $prev = $list[0];
12636N/A    my @uniq_list = ($prev);
12636N/A    foreach my $str (@list) {
12636N/A    next if $str eq $prev;
12636N/A    push (@uniq_list, $str);
12636N/A    $prev = $str;
12636N/A    }
12636N/A    return @uniq_list;
12636N/A}
12636N/A
12636N/A# return the number of lines in str
12636N/Asub line_count ($) {
12636N/A    my $str = shift;
12636N/A
12636N/A    return ($str =~ tr/\n//) + 1;
12636N/A}
12636N/A
12636N/A# return 1 if str is a member of the list, 0 otherwise
12636N/Asub is_member ($@) {
12636N/A    my $str = shift;
12636N/A    my @list = @_;
12636N/A
12636N/A    foreach my $s (@list) {
12636N/A    if ($str eq $s) {
12636N/A        return 1;
12636N/A    }
12636N/A    }
12636N/A
12636N/A    return 0;
12636N/A}
12636N/A
12636N/Asub do_merge_comments ($$$$$);
12636N/A
12636N/A# Args: references to lists of strings (lines of the texts)
12636N/A#
12636N/A# ml1: lines from the first text already processed
12636N/A# l1:  remaining lines of the 1st text
12636N/A# nl1: remaining normalised lines of the 1st text
12636N/A# l2:  remaining lines of the 2nd text
12636N/A# nl2: remaining normalised lines of the 1st text
12636N/A#
12636N/A# Return: list of merged lines
12636N/Asub do_merge_comments ($$$$$) {
12636N/A    my $ml1_ref = shift;
12636N/A    my $l1_ref = shift;
12636N/A    my $nl1_ref = shift;
12636N/A    my $l2_ref = shift;
12636N/A    my $nl2_ref = shift;
12636N/A
12636N/A    my @mlines1 = @$ml1_ref;
12636N/A    my @nmlines1;
12636N/A    my @lines1 = @$l1_ref;
12636N/A    my @norm_lines1 = @$nl1_ref;
12636N/A    my @lines2 = @$l2_ref;
12636N/A    my @norm_lines2 = @$nl2_ref;
12636N/A    my @nmlines2;
12636N/A    my @mlines2;
12636N/A
12636N/A    my @merged_lines;
12636N/A    my $line1;
12636N/A    my $norm_line1;
12636N/A    my $line2;
12636N/A    my $norm_line2;
12636N/A
12636N/A    if ($debug > 2) {
12636N/A    print "DEBUG: attempting to merge\n";
12636N/A    if (@mlines1) {
12636N/A        print "DEBUG: lines already processed from 1st text:\n";
12636N/A        print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
12636N/A        foreach my $l (@mlines1) {
12636N/A        print "DEBUG: $l\n";
12636N/A        }
12636N/A    }
12636N/A    print "DEBUG: 1st text:\n";
12636N/A    print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
12636N/A    foreach my $l (@lines1) {
12636N/A        print "DEBUG: $l\n";
12636N/A    }
12636N/A    print "DEBUG: 2nd text:\n";
12636N/A    print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
12636N/A    foreach my $l (@lines2) {
12636N/A        print "DEBUG: $l\n";
12636N/A    }
12636N/A    print "DEBUG: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
12636N/A    }
12636N/A
12636N/A    if (not @lines1) {
12636N/A    push (@merged_lines, @mlines1);
12636N/A    push (@merged_lines, @lines2);
12636N/A    return @merged_lines;
12636N/A    }
12636N/A
12636N/A    if (not @lines2) {
12636N/A    push (@merged_lines, @mlines1);
12636N/A    push (@merged_lines, @lines1);
12636N/A    return @merged_lines;
12636N/A    }
12636N/A
12636N/A    # first save the lines only appearing in lines1,
12636N/A    # stop at the first 2 common lines that are not empty
12636N/A    while (@lines1) {
12636N/A    $line1 = shift (@lines1);
12636N/A    $norm_line1 = shift (@norm_lines1);
12636N/A    if (($norm_line1 ne '') and
12636N/A        is_member ($norm_line1, @norm_lines2)) {
12636N/A        last;
12636N/A    } else {
12636N/A        push (@mlines1, $line1);
12636N/A        push (@nmlines1, $norm_line1);
12636N/A    }
12636N/A    }
12636N/A    # now save the lines appearing in lines2 before the common line
12636N/A    while (@lines2) {
12636N/A    $line2 = shift (@lines2);
12636N/A    $norm_line2 = shift (@norm_lines2);
12636N/A
12636N/A    if ($norm_line2 ne $norm_line1) {
12636N/A        push (@mlines2, $line2);
12636N/A        push (@nmlines2, $line2);
12636N/A    } else {
12636N/A            last;
12636N/A        }
12636N/A    }
12636N/A    my @common_lines;
12636N/A    my @ncommon_lines;
12636N/A    # now save the first common line
12636N/A    if ($norm_line1 eq $norm_line2) {
12636N/A    if ($debug > 3) {
12636N/A        print "DEBUG: 1st common line:\n";
12636N/A        print "DEBUG: $line1\n";
12636N/A    }
12636N/A    @common_lines = ($line1);
12636N/A    @ncommon_lines = ($norm_line2);
12636N/A    } else {
12636N/A    # no common lines were found
12636N/A    # lines1 should be empty, all lines moved to mlines1
12636N/A    push (@merged_lines, @mlines1);
12636N/A    push (@merged_lines, @mlines2);
12636N/A    return @merged_lines;
12636N/A    }
12636N/A    # save all common lines
12636N/A    while (@lines1 and @lines2) {
12636N/A    $line1 = shift (@lines1);
12636N/A    $norm_line1 = shift (@norm_lines1);
12636N/A    $line2 = shift (@lines2);
12636N/A    $norm_line2 = shift (@norm_lines2);
12636N/A    if ($norm_line1 ne $norm_line2) {
12636N/A        if ($debug > 3) {
12636N/A        print "DEBUG: no more common lines.\n";
12636N/A        }
12636N/A        unshift (@lines1, $line1);
12636N/A        unshift (@norm_lines1, $norm_line1);
12636N/A        unshift (@lines2, $line2);
12636N/A        unshift (@norm_lines2, $norm_line2);
12636N/A        last;
12636N/A    } else {
12636N/A        if ($debug > 3) {
12636N/A        print "DEBUG: common line:\n";
12636N/A        print "DEBUG: $line1\n";
12636N/A        }
12636N/A        push (@common_lines, $line1);
12636N/A        push (@ncommon_lines, $norm_line1);
12636N/A    }
12636N/A    }
12636N/A
12701N/A    # only merge if the number of common lines is at least $min_merge
12636N/A    # or we are at the end of one of the texts or if at the
12636N/A    # beginning of the 2nd text
12701N/A    if (($#common_lines >= $min_merge) or
12636N/A    (not @lines1) or (not @lines2) or
12636N/A    (not @mlines2)) {
12636N/A    if ($debug > 1) {
12636N/A        print "DEBUG: common lines:\n";
12636N/A        foreach my $l (@common_lines) {
12636N/A        print "DEBUG: $l\n";
12636N/A        }
12636N/A        print "DEBUG: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n";
12636N/A    }
12636N/A    # first the lines from the 1st text
12636N/A    push (@merged_lines, @mlines1);
12636N/A    # then the lines from the 2nd text
12636N/A    push (@merged_lines, @mlines2);
12636N/A    # finally the common lines
12636N/A    push (@merged_lines, @common_lines);
12636N/A    } else {
12636N/A    # don't merge
12636N/A
12636N/A    # add the common lines to the processed part of the 1st text
12636N/A    push (@mlines1, @common_lines);
12636N/A
12636N/A    # add the common lines back to the unprocessed part of the 2nd text
12636N/A    unshift (@lines2, @common_lines);
12636N/A    # add the lines before the common lines back to the unprocessed
12636N/A    # part of the 2nd text
12636N/A    unshift (@lines2, @mlines2);
12636N/A    # add the normalised common lines back to
12636N/A    # the unprocessed part of the 2nd text
12636N/A    unshift (@norm_lines2, @ncommon_lines);
12636N/A    # add the normalised lines before the common lines back to
12636N/A        # the unprocessed part of the 2nd text
12636N/A    unshift (@norm_lines2, @nmlines2);
12636N/A
12636N/A    # add the normalised common lines back to
12636N/A    # try to merge the rest of the texts
12636N/A    my @more_merged_lines = do_merge_comments (\@mlines1,
12636N/A        \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
12636N/A    push (@merged_lines, @more_merged_lines);
12636N/A    return @merged_lines;
12636N/A    }
12636N/A
12636N/A    if (not @lines1) {
12636N/A    push (@merged_lines, @lines2);
12636N/A    } elsif (not @lines2) {
12636N/A    push (@merged_lines, @lines1);
12636N/A    } else {
12636N/A    # repeat the process for the remaining lines
12636N/A    my @l1;
12636N/A    my @more_merged_lines = do_merge_comments (\@l1,
12636N/A        \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
12636N/A    push (@merged_lines, @more_merged_lines);
12636N/A    }
12636N/A
12636N/A    return @merged_lines;
12636N/A}
12636N/A
12636N/Asub merge_comments ($$) {
12636N/A    my $str1 = shift;
12636N/A    my $str2 = shift;
12636N/A    my @lines1 = split /\n/, $str1;
12636N/A    my @lines2 = split /\n/, $str2;
12636N/A    my @norm_lines1;
12636N/A    my @norm_lines2;
12636N/A
12636N/A    foreach my $l0 (@lines1) {
12636N/A    # ignore whitespace differences
12636N/A    my $l1 = "$l0";
12636N/A    $l1 =~ s/\s+/ /g;
12636N/A    $l1 =~ s/^ //g;
12636N/A    chomp ($l1);
12636N/A    $l1 =~ s/ $//g;
12636N/A    $l1 = lc ($l1);
12636N/A    push (@norm_lines1, $l1);
12636N/A    }
12636N/A    foreach my $l0 (@lines2) {
12636N/A    # ignore whitespace differences
12636N/A    my $l2 = "$l0";
12636N/A    $l2 =~ s/\s+/ /g;
12636N/A    $l2 =~ s/^ //g;
12636N/A    chomp ($l2);
12636N/A    $l2 =~ s/ $//g;
12636N/A    $l2 = lc ($l2);
12636N/A    push (@norm_lines2, $l2);
12636N/A    }
12636N/A
12636N/A    my @l0;
12636N/A    my @merged_lines = do_merge_comments (\@l0, \@lines1, \@norm_lines1,
12636N/A                      \@lines2, \@norm_lines2);
12636N/A    my $merged_str;
12636N/A    if ($copyright_first) {
12636N/A    my @copyright_lines;
12636N/A    my @non_cr_lines;
12636N/A
12636N/A    foreach my $line (@merged_lines) {
12636N/A        if ($line =~ /^\s*(copyright|\(c\)|©|author:|all rights reserved)/i) {
12636N/A        push (@copyright_lines, $line);
12636N/A        } else {
12636N/A        push (@non_cr_lines, $line);
12636N/A        }
12636N/A    }
12636N/A    @copyright_lines = sort (@copyright_lines);
12636N/A    @copyright_lines = uniq (@copyright_lines);
12636N/A    $merged_str = join ("\n", (@copyright_lines, @non_cr_lines));
12636N/A    } else {
12636N/A    $merged_str = join ("\n", @merged_lines);
12636N/A    }
12636N/A    return $merged_str;
12636N/A}
12636N/A
12636N/Amy @all_comments;
12636N/Amy %comments;
12636N/A
12636N/Asub unify_comments () {
12636N/A    foreach my $fname (keys %blurbs) {
20126N/A    foreach my $blurb (@{$blurbs{$fname}}) {
20126N/A        if ($blurb =~ /\b(gpl|lgpl|gnu\s+(library\s+|lesser\s+|)general\s+public\s+license)\b/si) {
20126N/A        # looks like GNU GPL/LGPL
20126N/A        $gpl_found = 1;
20126N/A        }
20126N/A        if (defined ($comments{$blurb})) {
20126N/A        $comments{$blurb} .= ", $fname";
20126N/A        } else {
20126N/A        $comments{$blurb} = $fname;
20126N/A        }
12636N/A    }
20126N/A    @all_comments = (keys %comments);
12636N/A    }
12636N/A}
12636N/A
12636N/Asub smart_merge_comments () {
12636N/A    my @temp_all_comments = @all_comments;
12636N/A    @all_comments = ();
12636N/A
12636N/A    my $i = 0;
12636N/A    while ($i <= $#temp_all_comments) {
12636N/A    my $did_merge = 0;
12636N/A    my $c1 = $temp_all_comments[$i];
12636N/A    for (my $j = $i+1; $j <= $#temp_all_comments; $j++) {
12636N/A        my $c2 = $temp_all_comments[$j];
12636N/A        my $c1_lc = line_count ($c1);
12636N/A        my $c2_lc = line_count ($c2);
12636N/A        my $c12_merged = merge_comments ($c1, $c2);
12636N/A        my $c12_lc = line_count ($c12_merged);
12636N/A        # if more than 10 lines or more than 25% saved then
12636N/A        # keep the merged comments
12636N/A        my $diff_lc = $c1_lc + $c2_lc - $c12_lc;
12636N/A        if (($diff_lc > 10) or ($c12_lc <= ($c1_lc + $c2_lc)*0.75)) {
12636N/A        if ($debug > 0) {
12636N/A            print "DEBUG*****************************************\n";
12636N/A            print "$c1\n";
12636N/A            print "++++++++++++++++++++++++++++++++++++++++++++++\n";
12636N/A            print "$c2\n";
12636N/A            print "==============================================\n";
12636N/A            print "$c12_merged\n";
12636N/A            print "*****************************************DEBUG\n";
12636N/A        }
12636N/A        $temp_all_comments[$j] = $c12_merged;
12636N/A        $did_merge = 1;
12636N/A        $comments{$c12_merged} = "$comments{$c1}, $comments{$c2}";
12636N/A        last;
12636N/A        }
12636N/A    }
12636N/A    if (not $did_merge) {
12636N/A        push (@all_comments, $c1);
12636N/A    }
12636N/A    $i++;
12636N/A    }
12636N/A}
12636N/A
12636N/Asub print_comments () {
12636N/A    if ($gpl_found and $gpl_disclaimer) {
12636N/A    print << "__EOF"
12636N/AFor the avoidance of doubt, except that if any license choice other
19125N/Athan GPL or LGPL is available it will apply instead, Oracle elects to
12636N/Ause only the General Public License version 2 (GPLv2) at this time
12636N/Afor any software where a choice of GPL license versions is made
12636N/Aavailable with the language indicating that GPLv2 or any later
12636N/Aversion may be used, or where a choice of which version of the GPL
12636N/Ais applied is otherwise unspecified.
12636N/A
12636N/A--------------------------------------------------------------------
12636N/A
12636N/A__EOF
12636N/A    }
12636N/A    foreach my $comment (@all_comments) {
12636N/A    print "$comments{$comment}:\n";
12636N/A    print $comment;
12636N/A    print "\n\n" .
12636N/A        "--------------------------------------------------------------------" .
12636N/A        "\n\n";
12636N/A    }
12636N/A}
12636N/A
12636N/Asub main() {
12636N/A    my $srcdir;
12636N/A
12636N/A    process_options ();
12636N/A
12636N/A    if (not @dirs) {
12636N/A    usage();
12636N/A    exit (1);
12636N/A    }
12636N/A
12636N/A    foreach my $srcdir (@dirs) {
12636N/A    if ($srcdir =~ /^\./) {
12636N/A        $srcdir = getcwd();
12636N/A    }
12636N/A    extract_comments ($srcdir);
12636N/A    }
12636N/A
12636N/A    unify_comments ();
12636N/A    if (not $dumb_mode) {
12636N/A    smart_merge_comments ();
12636N/A    }
12636N/A
12636N/A    print_comments ();
12636N/A
12636N/A    if ($print_omitted and @files_omitted) {
12636N/A    print "\nThe following files were not checked:\n\n";
12636N/A    foreach my $fname (@files_omitted) {
12636N/A        print "    $fname\n";
12636N/A    }
12636N/A    }
12636N/A}
12636N/A
12636N/Amain();