copyright-extractor revision 12636
#
# Script for extracting copyright and licensing information from source code
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License, Version 1.0 only
# (the "License"). You may not use this file except in compliance
# with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
use strict;
use warnings;
use Cwd;
print "copyright-extractor [options] <source directory>\n";
print "\n";
print "Options:\n";
print " -r, --raw\n";
print " Print raw comments only, do not attempt to merge,\n";
print " only unify identical comments.\n";
print " -c, --copyright-first\n";
print " Attempt to move copyright statements to the start of the\n";
print " comment block.\n";
print " Note: when using this option, there is a chance that\n";
print " lines get mixed up if a copyright statement extends to\n";
print " more than one line.\n";
print " -g, --gpl\n";
print " Add the disclaimer about GPLv2 to the beginning of the\n";
print " -O, --omitted\n";
print " Print a list of files that were not checked\n";
print " -h, --help\n";
print " Print this usage information\n";
print " -d n, --debug=n\n";
print " Turn on debug output.\n";
}
my %blurbs;
my $gpl_found = 0;
my @files_omitted;
my $debug = 0;
my $dumb_mode = 0;
my $copyright_first = 0;
my $gpl_disclaimer = 0;
my $print_omitted = 0;
my @dirs;
sub process_args {
my $arg = shift;
print "Try --help for usage.\n";
exit (1);
}
}
sub process_options {
}
# a very simple file type check based on the file name
# fname: the file name to classify
# Returns: one of the above contants
my $fname = shift;
if ($fname =~ /([~]$|\/(ChangeLog|configure\.in|Makefile|ltmain\.sh|README|NEWS|INSTALL|HACKING|configure$|config\.)$)/) {
# some file names to ignore
return FTYPE_IGNORE;
# some more file names to ignore
return FTYPE_IGNORE;
return FTYPE_C;
return FTYPE_PERL;
return FTYPE_PYTHON;
return FTYPE_SHELL;
return FTYPE_JAVA;
} else {
# FIXME: could do something smart here
return FTYPE_IGNORE;
}
}
# return 1 if the string includes words that suggest that the string
# is some sort of legal text. If none of these words appear in the
# string, this program will ignore it and assume that it's some other
# comment that happens to be at the beginning of the file
my $str = shift;
if ($str =~ /(licen[cs]|legal|terms|condition|copyright|rights|\(c\)|copying|usage|binary|distribut|gpl)/) {
return 1;
}
return 0;
}
# extract the comments
my $fname = shift;
my $blurb;
my $line;
next if $line =~ /^#!/;
$line =~ s/^#//;
# delete certain types of comments, like emacs mode spec, etc
$line =~ s/^\s*-\*-.*-\*-\s*$//;
}
}
}
}
my $fname = shift;
my $blurb;
my $in_comment_block = 0;
my $line;
$line =~ s/\*\/.*//;
$in_comment_block = 0;
$line =~ s/^\/\///;
}
} else {
$in_comment_block = 1;
$line =~ s/^\s*\/\*//;
$line =~ s/^\s*\/\///;
# add to blurb if not the start of the blurb
} else {
# end of comments, stop processing
last;
}
}
# delete certain types of comments, like emacs mode spec, etc
$line =~ s/^\s*-\*-.*-\*-\s*$//;
$line =~ s/^\s*\**\s*\@-type\@\s*$//;
}
}
}
}
sub extract_comments($);
# process a directory or a file recursively: extract the comments
# from the beginning of each file and save them in @blurbs
my $fname = shift;
# directory -> process recursively
}
# regular file -> identify file type and read comments
return if $ftype == FTYPE_IGNORE;
}
}
}
# like uniq(1)
my @list = @_;
my $prev;
return @list;
}
}
return @uniq_list;
}
# return the number of lines in str
my $str = shift;
}
# return 1 if str is a member of the list, 0 otherwise
my $str = shift;
my @list = @_;
return 1;
}
}
return 0;
}
sub do_merge_comments ($$$$$);
# Args: references to lists of strings (lines of the texts)
#
# ml1: lines from the first text already processed
# l1: remaining lines of the 1st text
# nl1: remaining normalised lines of the 1st text
# l2: remaining lines of the 2nd text
# nl2: remaining normalised lines of the 1st text
#
# Return: list of merged lines
my $ml1_ref = shift;
my $l1_ref = shift;
my $nl1_ref = shift;
my $l2_ref = shift;
my $nl2_ref = shift;
my @nmlines1;
my @norm_lines1 = @$nl1_ref;
my @norm_lines2 = @$nl2_ref;
my @nmlines2;
my @mlines2;
my @merged_lines;
my $line1;
my $norm_line1;
my $line2;
my $norm_line2;
print "DEBUG: attempting to merge\n";
print "DEBUG: lines already processed from 1st text:\n";
print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
print "DEBUG: $l\n";
}
}
print "DEBUG: 1st text:\n";
print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
print "DEBUG: $l\n";
}
print "DEBUG: 2nd text:\n";
print "DEBUG: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n";
print "DEBUG: $l\n";
}
print "DEBUG: <<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
}
return @merged_lines;
}
return @merged_lines;
}
# first save the lines only appearing in lines1,
# stop at the first 2 common lines that are not empty
last;
} else {
}
}
# now save the lines appearing in lines2 before the common line
} else {
last;
}
}
my @common_lines;
my @ncommon_lines;
# now save the first common line
print "DEBUG: 1st common line:\n";
}
} else {
# no common lines were found
# lines1 should be empty, all lines moved to mlines1
return @merged_lines;
}
# save all common lines
print "DEBUG: no more common lines.\n";
}
last;
} else {
print "DEBUG: common line:\n";
}
}
}
# only merge if the number of common lines is at least 11
# or we are at the end of one of the texts or if at the
# beginning of the 2nd text
print "DEBUG: common lines:\n";
print "DEBUG: $l\n";
}
print "DEBUG: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n";
}
# first the lines from the 1st text
# then the lines from the 2nd text
# finally the common lines
} else {
# don't merge
# add the common lines to the processed part of the 1st text
# add the common lines back to the unprocessed part of the 2nd text
# add the lines before the common lines back to the unprocessed
# part of the 2nd text
# add the normalised common lines back to
# the unprocessed part of the 2nd text
# add the normalised lines before the common lines back to
# the unprocessed part of the 2nd text
# add the normalised common lines back to
# try to merge the rest of the texts
my @more_merged_lines = do_merge_comments (\@mlines1,
\@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
return @merged_lines;
}
} else {
# repeat the process for the remaining lines
my @l1;
my @more_merged_lines = do_merge_comments (\@l1,
\@lines1, \@norm_lines1, \@lines2, \@norm_lines2);
}
return @merged_lines;
}
my $str1 = shift;
my $str2 = shift;
my @norm_lines1;
my @norm_lines2;
# ignore whitespace differences
$l1 =~ s/\s+/ /g;
$l1 =~ s/^ //g;
$l1 =~ s/ $//g;
}
# ignore whitespace differences
$l2 =~ s/\s+/ /g;
$l2 =~ s/^ //g;
$l2 =~ s/ $//g;
}
my @l0;
my @merged_lines = do_merge_comments (\@l0, \@lines1, \@norm_lines1,
\@lines2, \@norm_lines2);
my $merged_str;
my @copyright_lines;
my @non_cr_lines;
} else {
}
}
} else {
}
return $merged_str;
}
my @all_comments;
my %comments;
if ($blurbs{$fname} =~ /\b(gpl|lgpl|gnu\s+(library\s+|lesser\s+|)general\s+public\s+license)\b/si) {
$gpl_found = 1;
}
} else {
}
}
}
my @temp_all_comments = @all_comments;
my $i = 0;
my $did_merge = 0;
my $c1 = $temp_all_comments[$i];
my $c2 = $temp_all_comments[$j];
# if more than 10 lines or more than 25% saved then
# keep the merged comments
print "DEBUG*****************************************\n";
print "++++++++++++++++++++++++++++++++++++++++++++++\n";
print "==============================================\n";
print "*****************************************DEBUG\n";
}
$temp_all_comments[$j] = $c12_merged;
$did_merge = 1;
last;
}
}
}
$i++;
}
}
print << "__EOF"
--------------------------------------------------------------------
}
print $comment;
print "\n\n" .
"--------------------------------------------------------------------" .
"\n\n";
}
}
my $srcdir;
exit (1);
}
}
}
}
print "\nThe following files were not checked:\n\n";
}
}
}
main();