unicode-wordbreak.js.html revision 7d9dd27c758ee750e3ea3b2cf2932691378d5cdd
<head>
</head>
<body id="yahoo-com">
<div id="doc3" class="yui-t2">
<div id="hd">
<h3>unicode <span class="subtitle">3.3.0</span></h3>
> unicode-wordbreak.js (source view)
<form onsubmit="return false">
<div id="propertysearch">
Search: <input autocomplete="off" id="searchinput" />
<div id="searchresults">
</div>
</div>
</form>
</div>
<div id="bd">
<div id="yui-main">
<div class="yui-b">
<form action="#" name="yui-classopts-form" method="get" id="yui-classopts-form">
<fieldset>
<legend>Filters</legend>
<span class="classopts"><input type="checkbox" name="show_private" id="show_private" /> <label for="show_private">Show Private</label></span>
<span class="classopts"><input type="checkbox" name="show_protected" id="show_protected" /> <label for="show_protected">Show Protected</label></span>
<span class="classopts"><input type="checkbox" name="show_deprecated" id="show_deprecated" /> <label for="show_deprecated">Show Deprecated</label></span>
</fieldset>
</form>
<div id="srcout">
<style>
#doc3 .classopts { display:none; }
</style>
<div class="highlight"><pre><span class="cm">/**</span>
<span class="cm"> * Provides utility methods for splitting strings on word breaks and determining</span>
<span class="cm"> * whether a character represents a word boundary, using the algorithm defined</span>
<span class="cm"> * in the Unicode Text Segmentation guidelines</span>
<span class="cm"> * (<a href="http://unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard</span>
<span class="cm"> * Annex #29</a>).</span>
<span class="cm"> *</span>
<span class="cm"> * @module unicode</span>
<span class="cm"> * @submodule unicode-wordbreak</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="kd">var</span> <span class="nx">Unicode</span> <span class="o">=</span> <span class="nx">Y</span><span class="p">.</span><span class="nx">Unicode</span><span class="p">,</span>
<span class="nx">WBData</span> <span class="o">=</span> <span class="nx">Unicode</span><span class="p">.</span><span class="nx">Data</span><span class="p">.</span><span class="nx">WordBreak</span><span class="p">,</span>
<span class="c1">// Constants representing code point classifications.</span>
<span class="nx">ALETTER</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="nx">MIDNUMLET</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
<span class="nx">MIDLETTER</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span>
<span class="nx">MIDNUM</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
<span class="nx">NUMERIC</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span>
<span class="nx">CR</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
<span class="nx">LF</span> <span class="o">=</span> <span class="mi">6</span><span class="p">,</span>
<span class="nx">NEWLINE</span> <span class="o">=</span> <span class="mi">7</span><span class="p">,</span>
<span class="nx">EXTEND</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
<span class="nx">FORMAT</span> <span class="o">=</span> <span class="mi">9</span><span class="p">,</span>
<span class="nx">KATAKANA</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span>
<span class="nx">EXTENDNUMLET</span> <span class="o">=</span> <span class="mi">11</span><span class="p">,</span>
<span class="nx">OTHER</span> <span class="o">=</span> <span class="mi">12</span><span class="p">,</span>
<span class="c1">// RegExp objects generated from code point data. Each regex matches a single</span>
<span class="c1">// character against a set of unicode code points. The index of each item in</span>
<span class="c1">// this array must match its corresponding code point constant value defined</span>
<span class="c1">// above.</span>
<span class="nx">SETS</span> <span class="o">=</span> <span class="p">[</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">aletter</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">midnumlet</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">midletter</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">midnum</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">numeric</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">cr</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">lf</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">newline</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">extend</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">format</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">katakana</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">extendnumlet</span><span class="p">)</span>
<span class="p">],</span>
<span class="nx">EMPTY_STRING</span> <span class="o">=</span> <span class="s1">''</span><span class="p">,</span>
<span class="nx">PUNCTUATION</span> <span class="o">=</span> <span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="s1">'^'</span> <span class="o">+</span> <span class="nx">WBData</span><span class="p">.</span><span class="nx">punctuation</span> <span class="o">+</span> <span class="s1">'$'</span><span class="p">),</span>
<span class="nx">WHITESPACE</span> <span class="o">=</span> <span class="sr">/\s/</span><span class="p">,</span>
<span class="nx">WordBreak</span> <span class="o">=</span> <span class="p">{</span>
<span class="c1">// -- Public Static Methods ------------------------------------------------</span>
<span class="cm">/**</span>
<span class="cm"> * Splits the specified string into an array of individual words.</span>
<span class="cm"> *</span>
<span class="cm"> * @method getWords</span>
<span class="cm"> * @param {String} string String to split.</span>
<span class="cm"> * @param {Object} options (optional) Options object containing zero or more</span>
<span class="cm"> * of the following properties:</span>
<span class="cm"> *</span>
<span class="cm"> * <dl></span>
<span class="cm"> * <dt>ignoreCase (Boolean)</dt></span>
<span class="cm"> * <dd></span>
<span class="cm"> * If <code>true</code>, the string will be converted to lowercase</span>
<span class="cm"> * before being split. Default is <code>false</code>.</span>
<span class="cm"> * </dd></span>
<span class="cm"> *</span>
<span class="cm"> * <dt>includePunctuation (Boolean)</dt></span>
<span class="cm"> * <dd></span>
<span class="cm"> * If <code>true</code>, the returned array will include punctuation</span>
<span class="cm"> * characters. Default is <code>false</code>.</span>
<span class="cm"> * </dd></span>
<span class="cm"> *</span>
<span class="cm"> * <dt>includeWhitespace (Boolean)</dt></span>
<span class="cm"> * <dd></span>
<span class="cm"> * If <code>true</code>, the returned array will include whitespace</span>
<span class="cm"> * characters. Default is <code>false</code>.</span>
<span class="cm"> * </dd></span>
<span class="cm"> * </dl></span>
<span class="cm"> * @return {Array} Array of words.</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">getWords</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">options</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="nx">map</span> <span class="o">=</span> <span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_classify</span><span class="p">(</span><span class="nx">string</span><span class="p">),</span>
<span class="nx">len</span> <span class="o">=</span> <span class="nx">map</span><span class="p">.</span><span class="nx">length</span><span class="p">,</span>
<span class="nx">word</span> <span class="o">=</span> <span class="p">[],</span>
<span class="nx">words</span> <span class="o">=</span> <span class="p">[],</span>
<span class="nx">chr</span><span class="p">,</span>
<span class="nx">includePunctuation</span><span class="p">,</span>
<span class="nx">includeWhitespace</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nx">options</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">options</span> <span class="o">=</span> <span class="p">{};</span>
<span class="p">}</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">options</span><span class="p">.</span><span class="nx">ignoreCase</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">string</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">toLowerCase</span><span class="p">();</span>
<span class="p">}</span>
<span class="nx">includePunctuation</span> <span class="o">=</span> <span class="nx">options</span><span class="p">.</span><span class="nx">includePunctuation</span><span class="p">;</span>
<span class="nx">includeWhitespace</span> <span class="o">=</span> <span class="nx">options</span><span class="p">.</span><span class="nx">includeWhitespace</span><span class="p">;</span>
<span class="c1">// Loop through each character in the classification map and determine</span>
<span class="c1">// whether it precedes a word boundary, building an array of distinct</span>
<span class="c1">// words as we go.</span>
<span class="k">for</span> <span class="p">(;</span> <span class="nx">i</span> <span class="o"><</span> <span class="nx">len</span><span class="p">;</span> <span class="o">++</span><span class="nx">i</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">chr</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="nx">i</span><span class="p">);</span>
<span class="c1">// Append this character to the current word.</span>
<span class="nx">word</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">chr</span><span class="p">);</span>
<span class="c1">// If there's a word boundary between the current character and the</span>
<span class="c1">// next character, append the current word to the words array and</span>
<span class="c1">// start building a new word. </span>
<span class="k">if</span> <span class="p">(</span><span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_isWordBoundary</span><span class="p">(</span><span class="nx">map</span><span class="p">,</span> <span class="nx">i</span><span class="p">))</span> <span class="p">{</span>
<span class="nx">word</span> <span class="o">=</span> <span class="nx">word</span><span class="p">.</span><span class="nx">join</span><span class="p">(</span><span class="nx">EMPTY_STRING</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">word</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">includeWhitespace</span> <span class="o">||</span> <span class="o">!</span><span class="nx">WHITESPACE</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">word</span><span class="p">))</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">includePunctuation</span> <span class="o">||</span> <span class="o">!</span><span class="nx">PUNCTUATION</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">word</span><span class="p">)))</span> <span class="p">{</span>
<span class="nx">words</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">word</span><span class="p">);</span>
<span class="p">}</span>
<span class="nx">word</span> <span class="o">=</span> <span class="p">[];</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">return</span> <span class="nx">words</span><span class="p">;</span>
<span class="p">},</span>
<span class="cm">/**</span>
<span class="cm"> * Returns an array containing only unique words from the specified string.</span>
<span class="cm"> * For example, the string <code>'foo bar baz foo'</code> would result in</span>
<span class="cm"> * the array <code>['foo', 'bar', 'baz']</code>.</span>
<span class="cm"> *</span>
<span class="cm"> * @method getUniqueWords</span>
<span class="cm"> * @param {String} string String to split.</span>
<span class="cm"> * @param {Object} options (optional) Options (see <code>getWords()</code></span>
<span class="cm"> * for details).</span>
<span class="cm"> * @return {Array} Array of unique words.</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">getUniqueWords</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">options</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="nx">Y</span><span class="p">.</span><span class="nb">Array</span><span class="p">.</span><span class="nx">unique</span><span class="p">(</span><span class="nx">WordBreak</span><span class="p">.</span><span class="nx">getWords</span><span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">options</span><span class="p">));</span>
<span class="p">},</span>
<span class="cm">/**</span>
<span class="cm"> * Returns <code>true</code> if there is a word boundary after the specified</span>
<span class="cm"> * character index in the given string, <code>false</code> otherwise.</span>
<span class="cm"> *</span>
<span class="cm"> * @method isWordBoundary</span>
<span class="cm"> * @param {String} string String to test.</span>
<span class="cm"> * @param {Number} index Character index to test within the string.</span>
<span class="cm"> * @return {Boolean} <code>true</code> for a word boundary,</span>
<span class="cm"> * <code>false</code> otherwise.</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">isWordBoundary</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">index</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_isWordBoundary</span><span class="p">(</span><span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_classify</span><span class="p">(</span><span class="nx">string</span><span class="p">),</span> <span class="nx">index</span><span class="p">);</span>
<span class="p">},</span>
<span class="c1">// -- Protected Static Methods ---------------------------------------------</span>
<span class="cm">/**</span>
<span class="cm"> * Returns a character classification map for the specified string.</span>
<span class="cm"> *</span>
<span class="cm"> * @method _classify</span>
<span class="cm"> * @param {String} string String to classify.</span>
<span class="cm"> * @return {Array} Classification map.</span>
<span class="cm"> * @protected</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">_classify</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">chr</span><span class="p">,</span>
<span class="nx">map</span> <span class="o">=</span> <span class="p">[],</span>
<span class="nx">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="nx">j</span><span class="p">,</span>
<span class="nx">set</span><span class="p">,</span>
<span class="nx">stringLength</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">length</span><span class="p">,</span>
<span class="nx">setsLength</span> <span class="o">=</span> <span class="nx">SETS</span><span class="p">.</span><span class="nx">length</span><span class="p">,</span>
<span class="nx">type</span><span class="p">;</span>
<span class="k">for</span> <span class="p">(;</span> <span class="nx">i</span> <span class="o"><</span> <span class="nx">stringLength</span><span class="p">;</span> <span class="o">++</span><span class="nx">i</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">chr</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="nx">i</span><span class="p">);</span>
<span class="nx">type</span> <span class="o">=</span> <span class="nx">OTHER</span><span class="p">;</span>
<span class="k">for</span> <span class="p">(</span><span class="nx">j</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="nx">j</span> <span class="o"><</span> <span class="nx">setsLength</span><span class="p">;</span> <span class="o">++</span><span class="nx">j</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">set</span> <span class="o">=</span> <span class="nx">SETS</span><span class="p">[</span><span class="nx">j</span><span class="p">];</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">set</span> <span class="o">&&</span> <span class="nx">set</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">chr</span><span class="p">))</span> <span class="p">{</span>
<span class="nx">type</span> <span class="o">=</span> <span class="nx">j</span><span class="p">;</span>
<span class="k">break</span><span class="p">;</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="nx">map</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">type</span><span class="p">);</span>
<span class="p">}</span>
<span class="k">return</span> <span class="nx">map</span><span class="p">;</span>
<span class="p">},</span>
<span class="cm">/**</span>
<span class="cm"> * Returns <code>true</code> if there is a word boundary after the specified</span>
<span class="cm"> * character index, <code>false</code> otherwise.</span>
<span class="cm"> *</span>
<span class="cm"> * @method _isWordBoundary</span>
<span class="cm"> * @param {Array} map Character classification map generated by</span>
<span class="cm"> * <code>_classify</code>.</span>
<span class="cm"> * @param {Number} index Character index to test.</span>
<span class="cm"> * @return {Boolean}</span>
<span class="cm"> * @protected</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">_isWordBoundary</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">map</span><span class="p">,</span> <span class="nx">index</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">prevType</span><span class="p">,</span>
<span class="nx">type</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span><span class="p">],</span>
<span class="nx">nextType</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span> <span class="o">+</span> <span class="mi">1</span><span class="p">],</span>
<span class="nx">nextNextType</span><span class="p">;</span>
<span class="c1">// WB5. Don't break between most letters.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">&&</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="nx">nextNextType</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span> <span class="o">+</span> <span class="mi">2</span><span class="p">];</span>
<span class="c1">// WB6. Don't break letters across certain punctuation.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDLETTER</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&&</span>
<span class="nx">nextNextType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="nx">prevType</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span> <span class="o">-</span> <span class="mi">1</span><span class="p">];</span>
<span class="c1">// WB7. Don't break letters across certain punctuation.</span>
<span class="k">if</span> <span class="p">((</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDLETTER</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&&</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">&&</span>
<span class="nx">prevType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// adjacent to letters.</span>
<span class="k">if</span> <span class="p">((</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">))</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB11. Don't break inside numeric sequences like "3.2" or</span>
<span class="c1">// "3,456.789".</span>
<span class="k">if</span> <span class="p">((</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDNUM</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&&</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">&&</span>
<span class="nx">prevType</span> <span class="o">===</span> <span class="nx">NUMERIC</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB12. Don't break inside numeric sequences like "3.2" or</span>
<span class="c1">// "3,456.789".</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDNUM</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&&</span>
<span class="nx">nextNextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB4. Ignore format and extend characters.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">EXTEND</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">FORMAT</span> <span class="o">||</span>
<span class="nx">prevType</span> <span class="o">===</span> <span class="nx">EXTEND</span> <span class="o">||</span> <span class="nx">prevType</span> <span class="o">===</span> <span class="nx">FORMAT</span> <span class="o">||</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">EXTEND</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">FORMAT</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB3. Don't break inside CRLF.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">CR</span> <span class="o">&&</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">LF</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB3a. Break before newlines (including CR and LF).</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">NEWLINE</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">CR</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">LF</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB3b. Break after newlines (including CR and LF).</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NEWLINE</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">CR</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">LF</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB13. Don't break between Katakana characters.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">KATAKANA</span> <span class="o">&&</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">KATAKANA</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB13a. Don't break from extenders.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">EXTENDNUMLET</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">KATAKANA</span> <span class="o">||</span>
<span class="nx">type</span> <span class="o">===</span> <span class="nx">EXTENDNUMLET</span><span class="p">))</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB13b. Don't break from extenders.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">EXTENDNUMLET</span> <span class="o">&&</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">KATAKANA</span><span class="p">))</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// Break after any character not covered by the rules above.</span>
<span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="p">}</span>
<span class="p">};</span>
<span class="nx">Unicode</span><span class="p">.</span><span class="nx">WordBreak</span> <span class="o">=</span> <span class="nx">WordBreak</span><span class="p">;</span>
</pre></div>
</div>
</div>
</div>
<div class="yui-b">
<div class="nav">
<div id="moduleList" class="module">
<h4>Modules</h4>
<ul class="content">
<li class=""><a href="module_classnamemanager.html" title="classnamemanager">classnamemanager</a></li>
<li class=""><a href="module_event-valuechange.html" title="event-valuechange">event-valuechange</a></li>
<li class=""><a href="module_history-deprecated.html" title="history-deprecated">history-deprecated</a></li>
<li class=""><a href="module_node-focusmanager.html" title="node-focusmanager">node-focusmanager</a></li>
<li class=""><a href="module_scrollview-paginator.html" title="scrollview-paginator">scrollview-paginator</a></li>
<li class=""><a href="module_scrollview-scrollbars.html" title="scrollview-scrollbars">scrollview-scrollbars</a></li>
<li class=""><a href="module_widget-position-align.html" title="widget-position-align">widget-position-align</a></li>
<li class=""><a href="module_widget-position-constrain.html" title="widget-position-constrain">widget-position-constrain</a></li>
</ul>
</div>
<div id="classList" class="module">
<h4>Classes</h4>
<ul class="content">
<li class=""><a href="Unicode.AccentFold.html" title="Unicode.AccentFold">Unicode.AccentFold</a></li>
<li class=""><a href="Unicode.Data.AccentFold.html" title="Unicode.Data.AccentFold">Unicode.Data.AccentFold</a></li>
<li class=""><a href="Unicode.Data.WordBreak.html" title="Unicode.Data.WordBreak">Unicode.Data.WordBreak</a></li>
</ul>
</div>
<div id="fileList" class="module">
<h4>Files</h4>
<ul class="content">
<li class=""><a href="unicode-accentfold.js.html" title="unicode-accentfold.js">unicode-accentfold.js</a></li>
<li class=""><a href="unicode-data-accentfold.js.html" title="unicode-data-accentfold.js">unicode-data-accentfold.js</a></li>
<li class=""><a href="unicode-data-wordbreak.js.html" title="unicode-data-wordbreak.js">unicode-data-wordbreak.js</a></li>
<li class="selected"><a href="unicode-wordbreak.js.html" title="unicode-wordbreak.js">unicode-wordbreak.js</a></li>
</ul>
</div>
</div>
</div>
</div>
<div id="ft">
<hr />
Copyright © 2010 Yahoo! Inc. All rights reserved.
</div>
</div>
<script type="text/javascript">
var ALL_YUI_PROPS = [{"access": "", "host": "Unicode.AccentFold", "name": "canFold", "url": "Unicode.AccentFold.html#method_canFold", "type": "method"}, {"access": "protected", "host": "Unicode.WordBreak", "name": "_classify", "url": "Unicode.WordBreak.html#method__classify", "type": "method"}, {"access": "", "host": "Unicode.AccentFold", "name": "compare", "url": "Unicode.AccentFold.html#method_compare", "type": "method"}, {"access": "", "host": "Unicode.AccentFold", "name": "filter", "url": "Unicode.AccentFold.html#method_filter", "type": "method"}, {"access": "", "host": "Unicode.AccentFold", "name": "fold", "url": "Unicode.AccentFold.html#method_fold", "type": "method"}, {"access": "", "host": "Unicode.WordBreak", "name": "getUniqueWords", "url": "Unicode.WordBreak.html#method_getUniqueWords", "type": "method"}, {"access": "", "host": "Unicode.WordBreak", "name": "getWords", "url": "Unicode.WordBreak.html#method_getWords", "type": "method"}, {"access": "protected", "host": "Unicode.WordBreak", "name": "_isWordBoundary", "url": "Unicode.WordBreak.html#method__isWordBoundary", "type": "method"}, {"access": "", "host": "Unicode.WordBreak", "name": "isWordBoundary", "url": "Unicode.WordBreak.html#method_isWordBoundary", "type": "method"}];
</script>
</body>
</html>