76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassYUI.add('text-wordbreak', function(Y) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass/**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Provides utility methods for splitting strings on word breaks and determining
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * whether a character index represents a word boundary.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @module text
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @submodule text-wordbreak
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass/**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Provides utility methods for splitting strings on word breaks and determining
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * whether a character index represents a word boundary, using the generic word
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * breaking algorithm defined in the Unicode Text Segmentation guidelines
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * (<a href="http://unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Annex #29</a>).
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * This algorithm provides a reasonable default for many languages. However, it
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * does not cover language or context specific requirements, and it does not
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * provide meaningful results at all for languages that don't use spaces between
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * words, such as Chinese, Japanese, Thai, Lao, Khmer, and others. Server-based
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * word breaking services usually provide significantly better results with
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * better performance.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @class Text.WordBreak
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @static
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glassvar Text = Y.Text,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass WBData = Text.Data.WordBreak,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass// Constants representing code point classifications.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassALETTER = 0,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassMIDNUMLET = 1,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassMIDLETTER = 2,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassMIDNUM = 3,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassNUMERIC = 4,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassCR = 5,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassLF = 6,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassNEWLINE = 7,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassEXTEND = 8,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassFORMAT = 9,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassKATAKANA = 10,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassEXTENDNUMLET = 11,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassOTHER = 12,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass// RegExp objects generated from code point data. Each regex matches a single
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass// character against a set of Unicode code points. The index of each item in
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass// this array must match its corresponding code point constant value defined
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass// above.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassSETS = [
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.aletter),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.midnumlet),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.midletter),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.midnum),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.numeric),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.cr),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.lf),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.newline),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.extend),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.format),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.katakana),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass new RegExp(WBData.extendnumlet)
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass],
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassEMPTY_STRING = '',
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassPUNCTUATION = new RegExp('^' + WBData.punctuation + '$'),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassWHITESPACE = /\s/,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassWordBreak = {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // -- Public Static Methods ------------------------------------------------
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass /**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Splits the specified string into an array of individual words.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @method getWords
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {String} string String to split.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {Object} options (optional) Options object containing zero or more
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * of the following properties:
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dl>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dt>ignoreCase (Boolean)</dt>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dd>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * If <code>true</code>, the string will be converted to lowercase
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * before being split. Default is <code>false</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </dd>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dt>includePunctuation (Boolean)</dt>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dd>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * If <code>true</code>, the returned array will include punctuation
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * characters. Default is <code>false</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </dd>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dt>includeWhitespace (Boolean)</dt>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <dd>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * If <code>true</code>, the returned array will include whitespace
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * characters. Default is <code>false</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </dd>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </dl>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @return {Array} Array of words.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @static
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass getWords: function (string, options) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass var i = 0,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass map = WordBreak._classify(string),
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass len = map.length,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass word = [],
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass words = [],
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass chr,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass includePunctuation,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass includeWhitespace;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (!options) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass options = {};
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (options.ignoreCase) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass string = string.toLowerCase();
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass includePunctuation = options.includePunctuation;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass includeWhitespace = options.includeWhitespace;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // Loop through each character in the classification map and determine
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // whether it precedes a word boundary, building an array of distinct
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // words as we go.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass for (; i < len; ++i) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass chr = string.charAt(i);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // Append this character to the current word.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass word.push(chr);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // If there's a word boundary between the current character and the
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // next character, append the current word to the words array and
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // start building a new word.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (WordBreak._isWordBoundary(map, i)) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass word = word.join(EMPTY_STRING);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (word &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (includeWhitespace || !WHITESPACE.test(word)) &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (includePunctuation || !PUNCTUATION.test(word))) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass words.push(word);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass word = [];
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return words;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass },
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass /**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Returns an array containing only unique words from the specified string.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * For example, the string <code>'foo bar baz foo'</code> would result in
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * the array <code>['foo', 'bar', 'baz']</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @method getUniqueWords
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {String} string String to split.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {Object} options (optional) Options (see <code>getWords()</code>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * for details).
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @return {Array} Array of unique words.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @static
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass getUniqueWords: function (string, options) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return Y.Array.unique(WordBreak.getWords(string, options));
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass },
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass /**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Returns <code>true</code> if there is a word boundary between the
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * specified character index and the next character index (or the end of the
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * string).
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Note that there are always word breaks at the beginning and end of a
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * string, so <code>isWordBoundary('', 0)</code> and
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <code>isWordBoundary('a', 0)</code> will both return <code>true</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @method isWordBoundary
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {String} string String to test.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {Number} index Character index to test within the string.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @return {Boolean} <code>true</code> for a word boundary,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <code>false</code> otherwise.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @static
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass isWordBoundary: function (string, index) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return WordBreak._isWordBoundary(WordBreak._classify(string), index);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass },
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // -- Protected Static Methods ---------------------------------------------
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass /**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Returns a character classification map for the specified string.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @method _classify
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {String} string String to classify.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @return {Array} Classification map.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @protected
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @static
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass _classify: function (string) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass var chr,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass map = [],
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass i = 0,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass j,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass set,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass stringLength = string.length,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass setsLength = SETS.length,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass type;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass for (; i < stringLength; ++i) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass chr = string.charAt(i);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass type = OTHER;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass for (j = 0; j < setsLength; ++j) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass set = SETS[j];
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (set && set.test(chr)) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass type = j;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass break;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass map.push(type);
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return map;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass },
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass /**
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Returns <code>true</code> if there is a word boundary between the
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * specified character index and the next character index (or the end of the
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * string).
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * Note that there are always word breaks at the beginning and end of a
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * string, so <code>_isWordBoundary('', 0)</code> and
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <code>_isWordBoundary('a', 0)</code> will both return <code>true</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * </p>
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass *
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @method _isWordBoundary
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {Array} map Character classification map generated by
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * <code>_classify</code>.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @param {Number} index Character index to test.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @return {Boolean}
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @protected
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass * @static
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass */
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass _isWordBoundary: function (map, index) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass var prevType,
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass type = map[index],
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextType = map[index + 1],
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextNextType;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (index < 0 || (index > map.length - 1 && index !== 0)) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass Y.log('isWordBoundary: index out of bounds', 'warn', 'text-wordbreak');
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB5. Don't break between most letters.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === ALETTER && nextType === ALETTER) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextNextType = map[index + 2];
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB6. Don't break letters across certain punctuation.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === ALETTER &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (nextType === MIDLETTER || nextType === MIDNUMLET) &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextNextType === ALETTER) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass prevType = map[index - 1];
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB7. Don't break letters across certain punctuation.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if ((type === MIDLETTER || type === MIDNUMLET) &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextType === ALETTER &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass prevType === ALETTER) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB8/WB9/WB10. Don't break inside sequences of digits or digits
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // adjacent to letters.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if ((type === NUMERIC || type === ALETTER) &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (nextType === NUMERIC || nextType === ALETTER)) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB11. Don't break inside numeric sequences like "3.2" or
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // "3,456.789".
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if ((type === MIDNUM || type === MIDNUMLET) &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextType === NUMERIC &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass prevType === NUMERIC) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB12. Don't break inside numeric sequences like "3.2" or
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // "3,456.789".
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === NUMERIC &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (nextType === MIDNUM || nextType === MIDNUMLET) &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextNextType === NUMERIC) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB4. Ignore format and extend characters.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === EXTEND || type === FORMAT ||
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass prevType === EXTEND || prevType === FORMAT ||
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextType === EXTEND || nextType === FORMAT) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB3. Don't break inside CRLF.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === CR && nextType === LF) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB3a. Break before newlines (including CR and LF).
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === NEWLINE || type === CR || type === LF) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return true;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB3b. Break after newlines (including CR and LF).
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (nextType === NEWLINE || nextType === CR || nextType === LF) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return true;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB13. Don't break between Katakana characters.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === KATAKANA && nextType === KATAKANA) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB13a. Don't break from extenders.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (nextType === EXTENDNUMLET &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (type === ALETTER || type === NUMERIC || type === KATAKANA ||
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass type === EXTENDNUMLET)) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // WB13b. Don't break from extenders.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass if (type === EXTENDNUMLET &&
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass (nextType === ALETTER || nextType === NUMERIC ||
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass nextType === KATAKANA)) {
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return false;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass // Break after any character not covered by the rules above.
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass return true;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass }
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass};
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav GlassText.WordBreak = WordBreak;
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass
76ca635d61eb3f9fb7c9d788a44fa8b1690aa138Dav Glass}, '@VERSION@' ,{requires:['array-extras', 'text-data-wordbreak']});