SpellCheck.py revision f3a07f941f1be263a48a040c059edbcb722cf436
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2016 Joyent, Inc.
#
import re, sys
spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
altMsg = '%s: Lined %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
misspellings = {
'absense': 'absence',
'accessable': 'accessible',
'accomodate': 'accommodate',
'accomodation': 'accommodation',
'accross': 'across',
'acheive': 'achieve',
'addional': 'additional',
'addres': 'address',
'admininistrative': 'administrative',
'adminstered': 'administered',
'adminstrate': 'administrate',
'adminstration': 'administration',
'adminstrative': 'administrative',
'adminstrator': 'administrator',
'admissability': 'admissibility',
'adress': 'address',
'adressable': 'addressable',
'adressed': 'addressed',
'adressing': 'addressing, dressing',
'aginst': 'against',
'agression': 'aggression',
'agressive': 'aggressive',
'alot': 'a lot, allot',
'and and': 'and',
'apparantly': 'apparently',
'appearence': 'appearance',
'arguement': 'argument',
'assasination': 'assassination',
'auxilliary': 'auxiliary',
'basicly': 'basically',
'begining': 'beginning',
'belive': 'believe',
'beteen': 'between',
'betwen': 'between',
'beween': 'between',
'bewteen': 'between',
'bizzare': 'bizarre',
'buisness': 'business',
'calender': 'calendar',
'cemetary': 'cemetery',
'chauffer': 'chauffeur',
'collegue': 'colleague',
'comming': 'coming',
'commited': 'committed',
'commitee': 'committee',
'commiting': 'committing',
'comparision': 'comparison',
'comparisions': 'comparisons',
'compatability': 'compatibility',
'compatable': 'compatible',
'compatablity': 'compatibility',
'compatiable': 'compatible',
'compatiblity': 'compatibility',
'completly': 'completely',
'concious': 'conscious',
'condidtion': 'condition',
'conected': 'connected',
'conjuction': 'conjunction',
'continous': 'continuous',
'curiousity': 'curiosity',
'deamon': 'daemon',
'definately': 'definitely',
'desireable': 'desirable',
'diffrent': 'different',
'dilemna': 'dilemma',
'dissapear': 'disappear',
'dissapoint': 'disappoint',
'ecstacy': 'ecstasy',
'embarass': 'embarrass',
'enviroment': 'environment',
'exept': 'except',
'existance': 'existence',
'familar': 'familiar',
'finaly': 'finally',
'folowing': 'following',
'foriegn': 'foreign',
'forseeable': 'foreseeable',
'fourty': 'forty',
'foward': 'forward',
'freind': 'friend',
'futher': 'further',
'gaurd': 'guard',
'glamourous': 'glamorous',
'goverment': 'government',
'happend': 'happened',
'harrassment': 'harassment',
'hierachical': 'hierarchical',
'hierachies': 'hierarchies',
'hierachy': 'hierarchy',
'hierarcical': 'hierarchical',
'hierarcy': 'hierarchy',
'honourary': 'honorary',
'humourous': 'humorous',
'idiosyncracy': 'idiosyncrasy',
'immediatly': 'immediately',
'inaccessable': 'inaccessible',
'inbetween': 'between',
'incidently': 'incidentally',
'independant': 'independent',
'infomation': 'information',
'interupt': 'interrupt',
'intial': 'initial',
'intially': 'initially',
'irresistable': 'irresistible',
'jist': 'gist',
'knowlege': 'knowledge',
'lenght': 'length',
'liase': 'liaise',
'liason': 'liaison',
'libary': 'library',
'maching': 'machine, marching, matching',
'millenia': 'millennia',
'millenium': 'millennium',
'neccessary': 'necessary',
'negotation': 'negotiation',
'nontheless': 'nonetheless',
'noticable': 'noticeable',
'occassion': 'occasion',
'occassional': 'occasional',
'occassionally': 'occasionally',
'occurance': 'occurrence',
'occured': 'occurred',
'occurence': 'occurrence',
'occuring': 'occurring',
'ommision': 'omission',
'orginal': 'original',
'orginally': 'originally',
'pavillion': 'pavilion',
'peice': 'piece',
'persistant': 'persistent',
'politican': 'politician',
'posession': 'possession',
'possiblity': 'possibility',
'preceed': 'precede',
'preceeded': 'preceded',
'preceeding': 'preceding',
'preceeds': 'precedes',
'prefered': 'preferred',
'prefering': 'preferring',
'presense': 'presence',
'proces': 'process',
'propoganda': 'propaganda',
'psuedo': 'pseudo',
'publically': 'publicly',
'realy': 'really',
'reciept': 'receipt',
'recieve': 'receive',
'recieved': 'received',
'reciever': 'receiver',
'recievers': 'receivers',
'recieves': 'receives',
'recieving': 'receiving',
'recomend': 'recommend',
'recomended': 'recommended',
'recomending': 'recommending',
'recomends': 'recommends',
'recurse': 'recur',
'recurses': 'recurs',
'recursing': 'recurring',
'refered': 'referred',
'refering': 'referring',
'religous': 'religious',
'rember': 'remember',
'remeber': 'remember',
'repetion': 'repetition',
'reponsible': 'responsible',
'resistence': 'resistance',
'retreive': 'retrieve',
'seige': 'siege',
'sence': 'since',
'seperate': 'separate',
'seperated': 'separated',
'seperately': 'separately',
'seperates': 'separates',
'similiar': 'similar',
'somwhere': 'somewhere',
'sould': 'could, should, sold, soul',
'sturcture': 'structure',
'succesful': 'successful',
'succesfully': 'successfully',
'successfull': 'successful',
'sucessful': 'successful',
'supercede': 'supersede',
'supress': 'suppress',
'supressed': 'suppressed',
'suprise': 'surprise',
'suprisingly': 'surprisingly',
'sytem': 'system',
'tendancy': 'tendency',
'the the': 'the',
'the these': 'these',
'therefor': 'therefore',
'threshhold': 'threshold',
'tolerence': 'tolerance',
'tommorow': 'tomorrow',
'tommorrow': 'tomorrow',
'tounge': 'tongue',
'tranformed': 'transformed',
'transfered': 'transferred',
'truely': 'truly',
'trustworthyness': 'trustworthiness',
'unforseen': 'unforeseen',
'unfortunatly': 'unfortunately',
'unsuccessfull': 'unsuccessful',
'untill': 'until',
'upto': 'up to',
'whereever': 'wherever',
'wich': 'which',
'wierd': 'weird',
'wtih': 'with',
}
alternates = {
'parseable': 'parsable',
'sub-command': 'subcommand',
'sub-commands': 'subcommands',
'writeable': 'writable'
}
misspellingREs = []
alternateREs = []
for misspelling, correct in misspellings.iteritems():
regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
entry = (regex, misspelling, correct)
misspellingREs.append(entry)
for alternate, correct in alternates.iteritems():
regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
entry = (regex, alternate, correct)
alternateREs.append(entry)
def check(errmsg, output, filename, line, lineno, entry):
if entry[0].search(line):
output.write(errmsg % (filename, lineno, entry[1], entry[2]))
return 1
else:
return 0
def spellcheck(fh, filename=None, output=sys.stderr, **opts):
lineno = 1
ret = 0
if not filename:
filename = fh.name
fh.seek(0)
for line in fh:
for entry in misspellingREs:
ret |= check(spellMsg, output, filename, line,
lineno, entry)
for entry in alternateREs:
ret |= check(altMsg, output, filename, line,
lineno, entry)
lineno += 1
return ret