copyright-extractor revision 19915
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# Script for extracting copyright and licensing information from source code
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# CDDL HEADER START
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# The contents of this file are subject to the terms of the
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# Common Development and Distribution License, Version 1.0 only
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# (the "License"). You may not use this file except in compliance
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# with the License.
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# or http://www.opensolaris.org/os/licensing.
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# See the License for the specific language governing permissions
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# and limitations under the License.
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# When distributing Covered Code, include this CDDL HEADER in each
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# If applicable, add the following below this CDDL HEADER, with the
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# fields enclosed by brackets "[]" replaced with your own identifying
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# information: Portions Copyright [yyyy] [name of copyright owner]
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# CDDL HEADER END
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
7a4e3e29196e3abc1746714fcf93624edae89f93Lukas Slebodnikuse Getopt::Long qw(:config gnu_compat no_auto_abbrev bundling pass_through);
3996e391054a1c02ab62e1541ae21a8204bd5d0aAmitKumar print "copyright-extractor [options] <source directory>\n";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print " Print raw comments only, do not attempt to merge,\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " Attempt to move copyright statements to the start of the\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " Note: when using this option, there is a chance that\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " lines get mixed up if a copyright statement extends to\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " only merge if there are at least n consecutive identical\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " Add the disclaimer about GPLv2 to the beginning of the\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " output if any of the comments look like GPL/LGPL\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose print " Print a list of files that were not checked\n";
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose GetOptions ('d|debug=n' => sub { shift; $debug = shift; },
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose 'c|copyright-first' => sub { $copyright_first = 1; },
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose# a very simple file type check based on the file name
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose# fname: the file name to classify
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose# Returns: one of the above contants
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose if ($fname =~ /([~]$|\/(ChangeLog|configure\.in|Makefile|ltmain\.sh|README|NEWS|INSTALL|HACKING|configure$|config\.)$)/) {
032d0980dfe5a27a5954f44f9d519e03fc7d1cedSumit Bose } elsif ($fname =~ /\.(am|ac|o|lo|ps|la|cache|diff|out|log|guess|spec)$/) {
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek } elsif ($fname =~ /\.(c|h|hpp|cpp|C|CPP|cc|CC)$/) {
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek# return 1 if the string includes words that suggest that the string
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek# is some sort of legal text. If none of these words appear in the
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek# string, this program will ignore it and assume that it's some other
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek# comment that happens to be at the beginning of the file
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek if ($str =~ /(licen[cs]|legal|terms|condition|copyright|rights|\(c\)|copying|usage|binary|distribut|gpl)/) {
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek# extract the comments
852722ecb5dc09fc80cd3c837edb1cf6db529210Lukas Slebodnik open SRCFILE, "<$fname" or die "failed to open file $fname";
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek # delete certain types of comments, like emacs mode spec, etc
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek $line =~ s/^\s*-\*-.*-\*-\s*$//;
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek $line =~ s/^\s\$Id:.*\$\s*$//;
852722ecb5dc09fc80cd3c837edb1cf6db529210Lukas Slebodnik if (defined ($blurb) and is_legalese ($blurb)) {
852722ecb5dc09fc80cd3c837edb1cf6db529210Lukas Slebodnik open SRCFILE, "<$fname" or die "failed to open file $fname";
9873e54960fcbd65fd2f6c7d3404f8a1256ae759Jakub Hrozek $line =~ s/\*\/.*//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^.*\/\//\/\//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^.*\/\*/\/\*/;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*\/\*//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*\/\///;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # add to blurb if not the start of the blurb
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # delete certain types of comments, like emacs mode spec, etc
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*-\*-.*-\*-\s*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s\$Id:.*\$\s*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*\**\s*\\ingroup\s*.*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*\**\s*\\file\s*.*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*\**\s*\@-type\@\s*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher if (defined ($blurb) and is_legalese ($blurb)) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher open SRCFILE, "<$fname" or die "failed to open file $fname";
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif ($line eq '') {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # add to blurb if not the start of the blurb
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # end of comments, stop processing
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # delete certain types of comments, like emacs mode spec, etc
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*-\*-.*-\*-\s*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s*vim(:\S+=\S+)+\s*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher $line =~ s/^\s\$Id:.*\$\s*$//;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher if (defined $blurb) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif ($line ne '') {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher close SRCFILE;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher if (defined ($blurb) and is_legalese ($blurb)) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallaghersub extract_comments($);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# process a directory or a file recursively: extract the comments
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher# from the beginning of each file and save them in @blurbs
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallaghersub extract_comments($) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # directory -> process recursively
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher opendir(DIR, $fname) || die("Cannot open directory $fname");
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher my @thefiles= readdir(DIR);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher closedir(DIR);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher foreach my $f (@thefiles) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher next if $f eq '.';
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher next if $f eq '..';
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher next if $f eq '.libs';
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher next if $f eq 'intl';
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif (-f $fname) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher # regular file -> identify file type and read comments
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher my $ftype = get_file_type ($fname);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher return if $ftype == FTYPE_IGNORE;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher if ($ftype == FTYPE_C) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher extract_comments_c ($fname);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif ($ftype == FTYPE_PERL) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher extract_comments_shell ($fname);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif ($ftype == FTYPE_SHELL) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher extract_comments_shell ($fname);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif ($ftype == FTYPE_PYTHON) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher extract_comments_python ($fname);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher } elsif ($ftype == FTYPE_JAVA) {
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher extract_comments_c ($fname);
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher print STDERR "ERROR: $fname: no such file or directory\n";
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher my @list = @_;
dbd09f5703d721a58210e490609cfacb7eb56fcfStephen Gallagher if (not @list) {
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @uniq_list = ($prev);
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher foreach my $str (@list) {
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher push (@uniq_list, $str);
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher return @uniq_list;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# return the number of lines in str
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallaghersub line_count ($) {
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher return ($str =~ tr/\n//) + 1;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# return 1 if str is a member of the list, 0 otherwise
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallaghersub is_member ($@) {
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @list = @_;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher foreach my $s (@list) {
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallaghersub do_merge_comments ($$$$$);
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# Args: references to lists of strings (lines of the texts)
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# ml1: lines from the first text already processed
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# l1: remaining lines of the 1st text
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# nl1: remaining normalised lines of the 1st text
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# l2: remaining lines of the 2nd text
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# nl2: remaining normalised lines of the 1st text
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher# Return: list of merged lines
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallaghersub do_merge_comments ($$$$$) {
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @mlines1 = @$ml1_ref;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @lines1 = @$l1_ref;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @norm_lines1 = @$nl1_ref;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @lines2 = @$l2_ref;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @norm_lines2 = @$nl2_ref;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher my @merged_lines;
fa7921c8259539b750f7e9e7bcd82aa72020826aJakub Hrozek if (@mlines1) {
fa7921c8259539b750f7e9e7bcd82aa72020826aJakub Hrozek print "DEBUG: lines already processed from 1st text:\n";
fa7921c8259539b750f7e9e7bcd82aa72020826aJakub Hrozek print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
fa7921c8259539b750f7e9e7bcd82aa72020826aJakub Hrozek foreach my $l (@mlines1) {
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik foreach my $l (@lines1) {
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik foreach my $l (@lines2) {
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik print "DEBUG: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik if (not @lines1) {
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik push (@merged_lines, @mlines1);
db37eca433a5530975422b985b024a0e6ddbdc17Lukas Slebodnik push (@merged_lines, @lines2);
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher return @merged_lines;
ca92350db6ad6ac344181f7b8ec695eda29da675Stephen Gallagher if (not @lines2) {
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek push (@merged_lines, @mlines1);
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek push (@merged_lines, @lines1);
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek return @merged_lines;
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek # first save the lines only appearing in lines1,
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek # stop at the first 2 common lines that are not empty
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek while (@lines1) {
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek $line1 = shift (@lines1);
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek $norm_line1 = shift (@norm_lines1);
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek if (($norm_line1 ne '') and
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek is_member ($norm_line1, @norm_lines2)) {
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek push (@mlines1, $line1);
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek push (@nmlines1, $norm_line1);
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek # now save the lines appearing in lines2 before the common line
f3d91181d4ee9da3f8bbf4ddf8782951c0ae46c1Jakub Hrozek while (@lines2) {
f15683b4b100351e24e305d25bd4785c79ac8f55Sumit Bose $line2 = shift (@lines2);
f15683b4b100351e24e305d25bd4785c79ac8f55Sumit Bose $norm_line2 = shift (@norm_lines2);
58dee4047788964ed4b0f6c5d6512967f390ac21Lukas Slebodnik push (@mlines2, $line2);
58dee4047788964ed4b0f6c5d6512967f390ac21Lukas Slebodnik push (@nmlines2, $line2);
58dee4047788964ed4b0f6c5d6512967f390ac21Lukas Slebodnik my @common_lines;
f15683b4b100351e24e305d25bd4785c79ac8f55Sumit Bose my @ncommon_lines;
f15683b4b100351e24e305d25bd4785c79ac8f55Sumit Bose # now save the first common line
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek @common_lines = ($line1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek @ncommon_lines = ($norm_line2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # no common lines were found
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # lines1 should be empty, all lines moved to mlines1
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @mlines1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @mlines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek return @merged_lines;
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # save all common lines
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek while (@lines1 and @lines2) {
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek $line1 = shift (@lines1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek $norm_line1 = shift (@norm_lines1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek $line2 = shift (@lines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek $norm_line2 = shift (@norm_lines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@lines1, $line1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@norm_lines1, $norm_line1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@lines2, $line2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@norm_lines2, $norm_line2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@common_lines, $line1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@ncommon_lines, $norm_line1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # only merge if the number of common lines is at least $min_merge
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # or we are at the end of one of the texts or if at the
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # beginning of the 2nd text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek if (($#common_lines >= $min_merge) or
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek (not @lines1) or (not @lines2) or
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek (not @mlines2)) {
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek foreach my $l (@common_lines) {
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek print "DEBUG: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n";
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # first the lines from the 1st text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @mlines1);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # then the lines from the 2nd text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @mlines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # finally the common lines
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @common_lines);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # don't merge
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # add the common lines to the processed part of the 1st text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@mlines1, @common_lines);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # add the common lines back to the unprocessed part of the 2nd text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@lines2, @common_lines);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # add the lines before the common lines back to the unprocessed
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # part of the 2nd text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@lines2, @mlines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # add the normalised common lines back to
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # the unprocessed part of the 2nd text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@norm_lines2, @ncommon_lines);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # add the normalised lines before the common lines back to
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # the unprocessed part of the 2nd text
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek unshift (@norm_lines2, @nmlines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # add the normalised common lines back to
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek # try to merge the rest of the texts
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek my @more_merged_lines = do_merge_comments (\@mlines1,
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @more_merged_lines);
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek return @merged_lines;
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek if (not @lines1) {
dbea04f585a30d001b574317c068cd03a4fa332bJakub Hrozek push (@merged_lines, @lines2);
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce } elsif (not @lines2) {
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce push (@merged_lines, @lines1);
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce # repeat the process for the remaining lines
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce my @more_merged_lines = do_merge_comments (\@l1,
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce \@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce push (@merged_lines, @more_merged_lines);
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce return @merged_lines;
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorcesub merge_comments ($$) {
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce my @lines1 = split /\n/, $str1;
5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4Simo Sorce my @lines2 = split /\n/, $str2;
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce my @norm_lines1;
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce my @norm_lines2;
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce foreach my $l0 (@lines1) {
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce # ignore whitespace differences
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce $l1 =~ s/\s+/ /g;
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce push (@norm_lines1, $l1);
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce foreach my $l0 (@lines2) {
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce # ignore whitespace differences
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce $l2 =~ s/\s+/ /g;
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce push (@norm_lines2, $l2);
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce my @merged_lines = do_merge_comments (\@l0, \@lines1, \@norm_lines1,
8df169fdffb564ec932fede4216a123a71f1cc9aSimo Sorce \@lines2, \@norm_lines2);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my @copyright_lines;
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my @non_cr_lines;
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek foreach my $line (@merged_lines) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if ($line =~ /^\s*(copyright|\(c\)|©|author:|all rights reserved)/i) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek push (@copyright_lines, $line);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek push (@non_cr_lines, $line);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek @copyright_lines = sort (@copyright_lines);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek @copyright_lines = uniq (@copyright_lines);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek $merged_str = join ("\n", (@copyright_lines, @non_cr_lines));
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekmy @all_comments;
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekmy %comments;
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozeksub unify_comments () {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek foreach my $fname (keys %blurbs) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if ($blurbs{$fname} =~ /\b(gpl|lgpl|gnu\s+(library\s+|lesser\s+|)general\s+public\s+license)\b/si) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek # looks like GNU GPL/LGPL
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if (defined ($comments{$blurbs{$fname}})) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek $comments{$blurbs{$fname}} = $comments{$blurbs{$fname}} .
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek @all_comments = (keys %comments);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozeksub smart_merge_comments () {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my @temp_all_comments = @all_comments;
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek @all_comments = ();
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek while ($i <= $#temp_all_comments) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek for (my $j = $i+1; $j <= $#temp_all_comments; $j++) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my $c1_lc = line_count ($c1);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my $c2_lc = line_count ($c2);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my $c12_merged = merge_comments ($c1, $c2);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek my $c12_lc = line_count ($c12_merged);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek # if more than 10 lines or more than 25% saved then
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek # keep the merged comments
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if (($diff_lc > 10) or ($c12_lc <= ($c1_lc + $c2_lc)*0.75)) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print "DEBUG*****************************************\n";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print "++++++++++++++++++++++++++++++++++++++++++++++\n";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print "==============================================\n";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print "*****************************************DEBUG\n";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek $comments{$c12_merged} = "$comments{$c1}, $comments{$c2}";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek push (@all_comments, $c1);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozeksub print_comments () {
e3c99ae355408933b03357220f3db09423bd40ddJakub HrozekFor the avoidance of doubt, except that if any license choice other
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekthan GPL or LGPL is available it will apply instead, Oracle elects to
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekuse only the General Public License version 2 (GPLv2) at this time
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekfor any software where a choice of GPL license versions is made
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekavailable with the language indicating that GPLv2 or any later
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekversion may be used, or where a choice of which version of the GPL
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozekis applied is otherwise unspecified.
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek--------------------------------------------------------------------
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek foreach my $comment (@all_comments) {
9959c512ac3ba36f7a0db7614f0357ce0bae748fJakub Hrozek "--------------------------------------------------------------------" .
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek process_options ();
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if (not @dirs) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek foreach my $srcdir (@dirs) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if ($srcdir =~ /^\./) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek extract_comments ($srcdir);
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek unify_comments ();
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek smart_merge_comments ();
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print_comments ();
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek if ($print_omitted and @files_omitted) {
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek print "\nThe following files were not checked:\n\n";
e3c99ae355408933b03357220f3db09423bd40ddJakub Hrozek foreach my $fname (@files_omitted) {