unicode-wordbreak.js.html revision 7d9dd27c758ee750e3ea3b2cf2932691378d5cdd
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html xmlns:yui="http://yuilibrary.com/rdf/1.0/yui.rdf#">
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<title>API: unicode unicode-wordbreak.js (YUI Library)</title>
<link rel="stylesheet" type="text/css" href="assets/reset-fonts-grids-min.css" />
<link rel="stylesheet" type="text/css" href="assets/api.css" />
<script type="text/javascript" src="assets/api-js"></script>
<script type="text/javascript" src="assets/ac-js"></script>
</head>
<body id="yahoo-com">
<div id="doc3" class="yui-t2">
<div id="hd">
<h1><a href="http://developer.yahoo.com/yui/" title="Yahoo! UI Library">Yahoo! UI Library</a></h1>
<h3>unicode&nbsp; <span class="subtitle">3.3.0</span></h3>
<a href="/index.html" title="Yahoo! UI Library">Yahoo! UI Library</a>
&gt; <a href="/module_unicode.html" title="unicode">unicode</a>
&gt; unicode-wordbreak.js (source view)
<form onsubmit="return false">
<div id="propertysearch">
Search: <input autocomplete="off" id="searchinput" />
<div id="searchresults">
&nbsp;
</div>
</div>
</form>
</div>
<div id="bd">
<div id="yui-main">
<div class="yui-b">
<form action="#" name="yui-classopts-form" method="get" id="yui-classopts-form">
<fieldset>
<legend>Filters</legend>
<span class="classopts"><input type="checkbox" name="show_private" id="show_private" /> <label for="show_private">Show Private</label></span>
<span class="classopts"><input type="checkbox" name="show_protected" id="show_protected" /> <label for="show_protected">Show Protected</label></span>
<span class="classopts"><input type="checkbox" name="show_deprecated" id="show_deprecated" /> <label for="show_deprecated">Show Deprecated</label></span>
</fieldset>
</form>
<div id="srcout">
<style>
#doc3 .classopts { display:none; }
</style>
<div class="highlight"><pre><span class="cm">/**</span>
<span class="cm"> * Provides utility methods for splitting strings on word breaks and determining</span>
<span class="cm"> * whether a character represents a word boundary, using the algorithm defined</span>
<span class="cm"> * in the Unicode Text Segmentation guidelines</span>
<span class="cm"> * (&lt;a href=&quot;http://unicode.org/reports/tr29/#Word_Boundaries&quot;&gt;Unicode Standard</span>
<span class="cm"> * Annex #29&lt;/a&gt;).</span>
<span class="cm"> *</span>
<span class="cm"> * @module unicode</span>
<span class="cm"> * @submodule unicode-wordbreak</span>
<span class="cm"> * @class Unicode.WordBreak</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="kd">var</span> <span class="nx">Unicode</span> <span class="o">=</span> <span class="nx">Y</span><span class="p">.</span><span class="nx">Unicode</span><span class="p">,</span>
<span class="nx">WBData</span> <span class="o">=</span> <span class="nx">Unicode</span><span class="p">.</span><span class="nx">Data</span><span class="p">.</span><span class="nx">WordBreak</span><span class="p">,</span>
<span class="c1">// Constants representing code point classifications.</span>
<span class="nx">ALETTER</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="nx">MIDNUMLET</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
<span class="nx">MIDLETTER</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span>
<span class="nx">MIDNUM</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
<span class="nx">NUMERIC</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span>
<span class="nx">CR</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
<span class="nx">LF</span> <span class="o">=</span> <span class="mi">6</span><span class="p">,</span>
<span class="nx">NEWLINE</span> <span class="o">=</span> <span class="mi">7</span><span class="p">,</span>
<span class="nx">EXTEND</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
<span class="nx">FORMAT</span> <span class="o">=</span> <span class="mi">9</span><span class="p">,</span>
<span class="nx">KATAKANA</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span>
<span class="nx">EXTENDNUMLET</span> <span class="o">=</span> <span class="mi">11</span><span class="p">,</span>
<span class="nx">OTHER</span> <span class="o">=</span> <span class="mi">12</span><span class="p">,</span>
<span class="c1">// RegExp objects generated from code point data. Each regex matches a single</span>
<span class="c1">// character against a set of unicode code points. The index of each item in</span>
<span class="c1">// this array must match its corresponding code point constant value defined</span>
<span class="c1">// above.</span>
<span class="nx">SETS</span> <span class="o">=</span> <span class="p">[</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">aletter</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">midnumlet</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">midletter</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">midnum</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">numeric</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">cr</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">lf</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">newline</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">extend</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">format</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">katakana</span><span class="p">),</span>
<span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="nx">WBData</span><span class="p">.</span><span class="nx">extendnumlet</span><span class="p">)</span>
<span class="p">],</span>
<span class="nx">EMPTY_STRING</span> <span class="o">=</span> <span class="s1">&#39;&#39;</span><span class="p">,</span>
<span class="nx">PUNCTUATION</span> <span class="o">=</span> <span class="k">new</span> <span class="nb">RegExp</span><span class="p">(</span><span class="s1">&#39;^&#39;</span> <span class="o">+</span> <span class="nx">WBData</span><span class="p">.</span><span class="nx">punctuation</span> <span class="o">+</span> <span class="s1">&#39;$&#39;</span><span class="p">),</span>
<span class="nx">WHITESPACE</span> <span class="o">=</span> <span class="sr">/\s/</span><span class="p">,</span>
<span class="nx">WordBreak</span> <span class="o">=</span> <span class="p">{</span>
<span class="c1">// -- Public Static Methods ------------------------------------------------</span>
<span class="cm">/**</span>
<span class="cm"> * Splits the specified string into an array of individual words.</span>
<span class="cm"> *</span>
<span class="cm"> * @method getWords</span>
<span class="cm"> * @param {String} string String to split.</span>
<span class="cm"> * @param {Object} options (optional) Options object containing zero or more</span>
<span class="cm"> * of the following properties:</span>
<span class="cm"> *</span>
<span class="cm"> * &lt;dl&gt;</span>
<span class="cm"> * &lt;dt&gt;ignoreCase (Boolean)&lt;/dt&gt;</span>
<span class="cm"> * &lt;dd&gt;</span>
<span class="cm"> * If &lt;code&gt;true&lt;/code&gt;, the string will be converted to lowercase</span>
<span class="cm"> * before being split. Default is &lt;code&gt;false&lt;/code&gt;.</span>
<span class="cm"> * &lt;/dd&gt;</span>
<span class="cm"> *</span>
<span class="cm"> * &lt;dt&gt;includePunctuation (Boolean)&lt;/dt&gt;</span>
<span class="cm"> * &lt;dd&gt;</span>
<span class="cm"> * If &lt;code&gt;true&lt;/code&gt;, the returned array will include punctuation</span>
<span class="cm"> * characters. Default is &lt;code&gt;false&lt;/code&gt;.</span>
<span class="cm"> * &lt;/dd&gt;</span>
<span class="cm"> *</span>
<span class="cm"> * &lt;dt&gt;includeWhitespace (Boolean)&lt;/dt&gt;</span>
<span class="cm"> * &lt;dd&gt;</span>
<span class="cm"> * If &lt;code&gt;true&lt;/code&gt;, the returned array will include whitespace</span>
<span class="cm"> * characters. Default is &lt;code&gt;false&lt;/code&gt;.</span>
<span class="cm"> * &lt;/dd&gt;</span>
<span class="cm"> * &lt;/dl&gt;</span>
<span class="cm"> * @return {Array} Array of words.</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">getWords</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">options</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="nx">map</span> <span class="o">=</span> <span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_classify</span><span class="p">(</span><span class="nx">string</span><span class="p">),</span>
<span class="nx">len</span> <span class="o">=</span> <span class="nx">map</span><span class="p">.</span><span class="nx">length</span><span class="p">,</span>
<span class="nx">word</span> <span class="o">=</span> <span class="p">[],</span>
<span class="nx">words</span> <span class="o">=</span> <span class="p">[],</span>
<span class="nx">chr</span><span class="p">,</span>
<span class="nx">includePunctuation</span><span class="p">,</span>
<span class="nx">includeWhitespace</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nx">options</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">options</span> <span class="o">=</span> <span class="p">{};</span>
<span class="p">}</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">options</span><span class="p">.</span><span class="nx">ignoreCase</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">string</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">toLowerCase</span><span class="p">();</span>
<span class="p">}</span>
<span class="nx">includePunctuation</span> <span class="o">=</span> <span class="nx">options</span><span class="p">.</span><span class="nx">includePunctuation</span><span class="p">;</span>
<span class="nx">includeWhitespace</span> <span class="o">=</span> <span class="nx">options</span><span class="p">.</span><span class="nx">includeWhitespace</span><span class="p">;</span>
<span class="c1">// Loop through each character in the classification map and determine</span>
<span class="c1">// whether it precedes a word boundary, building an array of distinct</span>
<span class="c1">// words as we go.</span>
<span class="k">for</span> <span class="p">(;</span> <span class="nx">i</span> <span class="o">&lt;</span> <span class="nx">len</span><span class="p">;</span> <span class="o">++</span><span class="nx">i</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">chr</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="nx">i</span><span class="p">);</span>
<span class="c1">// Append this character to the current word.</span>
<span class="nx">word</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">chr</span><span class="p">);</span>
<span class="c1">// If there&#39;s a word boundary between the current character and the</span>
<span class="c1">// next character, append the current word to the words array and</span>
<span class="c1">// start building a new word. </span>
<span class="k">if</span> <span class="p">(</span><span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_isWordBoundary</span><span class="p">(</span><span class="nx">map</span><span class="p">,</span> <span class="nx">i</span><span class="p">))</span> <span class="p">{</span>
<span class="nx">word</span> <span class="o">=</span> <span class="nx">word</span><span class="p">.</span><span class="nx">join</span><span class="p">(</span><span class="nx">EMPTY_STRING</span><span class="p">);</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">word</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">includeWhitespace</span> <span class="o">||</span> <span class="o">!</span><span class="nx">WHITESPACE</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">word</span><span class="p">))</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">includePunctuation</span> <span class="o">||</span> <span class="o">!</span><span class="nx">PUNCTUATION</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">word</span><span class="p">)))</span> <span class="p">{</span>
<span class="nx">words</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">word</span><span class="p">);</span>
<span class="p">}</span>
<span class="nx">word</span> <span class="o">=</span> <span class="p">[];</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">return</span> <span class="nx">words</span><span class="p">;</span>
<span class="p">},</span>
<span class="cm">/**</span>
<span class="cm"> * Returns an array containing only unique words from the specified string.</span>
<span class="cm"> * For example, the string &lt;code&gt;&#39;foo bar baz foo&#39;&lt;/code&gt; would result in</span>
<span class="cm"> * the array &lt;code&gt;[&#39;foo&#39;, &#39;bar&#39;, &#39;baz&#39;]&lt;/code&gt;.</span>
<span class="cm"> *</span>
<span class="cm"> * @method getUniqueWords</span>
<span class="cm"> * @param {String} string String to split.</span>
<span class="cm"> * @param {Object} options (optional) Options (see &lt;code&gt;getWords()&lt;/code&gt;</span>
<span class="cm"> * for details).</span>
<span class="cm"> * @return {Array} Array of unique words.</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">getUniqueWords</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">options</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="nx">Y</span><span class="p">.</span><span class="nb">Array</span><span class="p">.</span><span class="nx">unique</span><span class="p">(</span><span class="nx">WordBreak</span><span class="p">.</span><span class="nx">getWords</span><span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">options</span><span class="p">));</span>
<span class="p">},</span>
<span class="cm">/**</span>
<span class="cm"> * Returns &lt;code&gt;true&lt;/code&gt; if there is a word boundary after the specified</span>
<span class="cm"> * character index in the given string, &lt;code&gt;false&lt;/code&gt; otherwise.</span>
<span class="cm"> *</span>
<span class="cm"> * @method isWordBoundary</span>
<span class="cm"> * @param {String} string String to test.</span>
<span class="cm"> * @param {Number} index Character index to test within the string.</span>
<span class="cm"> * @return {Boolean} &lt;code&gt;true&lt;/code&gt; for a word boundary,</span>
<span class="cm"> * &lt;code&gt;false&lt;/code&gt; otherwise.</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">isWordBoundary</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">,</span> <span class="nx">index</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_isWordBoundary</span><span class="p">(</span><span class="nx">WordBreak</span><span class="p">.</span><span class="nx">_classify</span><span class="p">(</span><span class="nx">string</span><span class="p">),</span> <span class="nx">index</span><span class="p">);</span>
<span class="p">},</span>
<span class="c1">// -- Protected Static Methods ---------------------------------------------</span>
<span class="cm">/**</span>
<span class="cm"> * Returns a character classification map for the specified string.</span>
<span class="cm"> *</span>
<span class="cm"> * @method _classify</span>
<span class="cm"> * @param {String} string String to classify.</span>
<span class="cm"> * @return {Array} Classification map.</span>
<span class="cm"> * @protected</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">_classify</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">string</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">chr</span><span class="p">,</span>
<span class="nx">map</span> <span class="o">=</span> <span class="p">[],</span>
<span class="nx">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="nx">j</span><span class="p">,</span>
<span class="nx">set</span><span class="p">,</span>
<span class="nx">stringLength</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">length</span><span class="p">,</span>
<span class="nx">setsLength</span> <span class="o">=</span> <span class="nx">SETS</span><span class="p">.</span><span class="nx">length</span><span class="p">,</span>
<span class="nx">type</span><span class="p">;</span>
<span class="k">for</span> <span class="p">(;</span> <span class="nx">i</span> <span class="o">&lt;</span> <span class="nx">stringLength</span><span class="p">;</span> <span class="o">++</span><span class="nx">i</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">chr</span> <span class="o">=</span> <span class="nx">string</span><span class="p">.</span><span class="nx">charAt</span><span class="p">(</span><span class="nx">i</span><span class="p">);</span>
<span class="nx">type</span> <span class="o">=</span> <span class="nx">OTHER</span><span class="p">;</span>
<span class="k">for</span> <span class="p">(</span><span class="nx">j</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="nx">j</span> <span class="o">&lt;</span> <span class="nx">setsLength</span><span class="p">;</span> <span class="o">++</span><span class="nx">j</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">set</span> <span class="o">=</span> <span class="nx">SETS</span><span class="p">[</span><span class="nx">j</span><span class="p">];</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">set</span> <span class="o">&amp;&amp;</span> <span class="nx">set</span><span class="p">.</span><span class="nx">test</span><span class="p">(</span><span class="nx">chr</span><span class="p">))</span> <span class="p">{</span>
<span class="nx">type</span> <span class="o">=</span> <span class="nx">j</span><span class="p">;</span>
<span class="k">break</span><span class="p">;</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="nx">map</span><span class="p">.</span><span class="nx">push</span><span class="p">(</span><span class="nx">type</span><span class="p">);</span>
<span class="p">}</span>
<span class="k">return</span> <span class="nx">map</span><span class="p">;</span>
<span class="p">},</span>
<span class="cm">/**</span>
<span class="cm"> * Returns &lt;code&gt;true&lt;/code&gt; if there is a word boundary after the specified</span>
<span class="cm"> * character index, &lt;code&gt;false&lt;/code&gt; otherwise.</span>
<span class="cm"> *</span>
<span class="cm"> * @method _isWordBoundary</span>
<span class="cm"> * @param {Array} map Character classification map generated by</span>
<span class="cm"> * &lt;code&gt;_classify&lt;/code&gt;.</span>
<span class="cm"> * @param {Number} index Character index to test.</span>
<span class="cm"> * @return {Boolean}</span>
<span class="cm"> * @protected</span>
<span class="cm"> * @static</span>
<span class="cm"> */</span>
<span class="nx">_isWordBoundary</span><span class="o">:</span> <span class="kd">function</span> <span class="p">(</span><span class="nx">map</span><span class="p">,</span> <span class="nx">index</span><span class="p">)</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">prevType</span><span class="p">,</span>
<span class="nx">type</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span><span class="p">],</span>
<span class="nx">nextType</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span> <span class="o">+</span> <span class="mi">1</span><span class="p">],</span>
<span class="nx">nextNextType</span><span class="p">;</span>
<span class="c1">// WB5. Don&#39;t break between most letters.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">&amp;&amp;</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="nx">nextNextType</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span> <span class="o">+</span> <span class="mi">2</span><span class="p">];</span>
<span class="c1">// WB6. Don&#39;t break letters across certain punctuation.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDLETTER</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&amp;&amp;</span>
<span class="nx">nextNextType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="nx">prevType</span> <span class="o">=</span> <span class="nx">map</span><span class="p">[</span><span class="nx">index</span> <span class="o">-</span> <span class="mi">1</span><span class="p">];</span>
<span class="c1">// WB7. Don&#39;t break letters across certain punctuation.</span>
<span class="k">if</span> <span class="p">((</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDLETTER</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&amp;&amp;</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">&amp;&amp;</span>
<span class="nx">prevType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB8/WB9/WB10. Don&#39;t break inside sequences of digits or digits</span>
<span class="c1">// adjacent to letters.</span>
<span class="k">if</span> <span class="p">((</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">)</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span><span class="p">))</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB11. Don&#39;t break inside numeric sequences like &quot;3.2&quot; or</span>
<span class="c1">// &quot;3,456.789&quot;.</span>
<span class="k">if</span> <span class="p">((</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDNUM</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&amp;&amp;</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">&amp;&amp;</span>
<span class="nx">prevType</span> <span class="o">===</span> <span class="nx">NUMERIC</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB12. Don&#39;t break inside numeric sequences like &quot;3.2&quot; or</span>
<span class="c1">// &quot;3,456.789&quot;.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDNUM</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">MIDNUMLET</span><span class="p">)</span> <span class="o">&amp;&amp;</span>
<span class="nx">nextNextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB4. Ignore format and extend characters.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">EXTEND</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">FORMAT</span> <span class="o">||</span>
<span class="nx">prevType</span> <span class="o">===</span> <span class="nx">EXTEND</span> <span class="o">||</span> <span class="nx">prevType</span> <span class="o">===</span> <span class="nx">FORMAT</span> <span class="o">||</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">EXTEND</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">FORMAT</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB3. Don&#39;t break inside CRLF.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">CR</span> <span class="o">&amp;&amp;</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">LF</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB3a. Break before newlines (including CR and LF).</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">NEWLINE</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">CR</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">LF</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB3b. Break after newlines (including CR and LF).</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NEWLINE</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">CR</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">LF</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB13. Don&#39;t break between Katakana characters.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">KATAKANA</span> <span class="o">&amp;&amp;</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">KATAKANA</span><span class="p">)</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB13a. Don&#39;t break from extenders.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">EXTENDNUMLET</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span> <span class="nx">type</span> <span class="o">===</span> <span class="nx">KATAKANA</span> <span class="o">||</span>
<span class="nx">type</span> <span class="o">===</span> <span class="nx">EXTENDNUMLET</span><span class="p">))</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// WB13b. Don&#39;t break from extenders.</span>
<span class="k">if</span> <span class="p">(</span><span class="nx">type</span> <span class="o">===</span> <span class="nx">EXTENDNUMLET</span> <span class="o">&amp;&amp;</span>
<span class="p">(</span><span class="nx">nextType</span> <span class="o">===</span> <span class="nx">ALETTER</span> <span class="o">||</span> <span class="nx">nextType</span> <span class="o">===</span> <span class="nx">NUMERIC</span> <span class="o">||</span>
<span class="nx">nextType</span> <span class="o">===</span> <span class="nx">KATAKANA</span><span class="p">))</span> <span class="p">{</span>
<span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
<span class="p">}</span>
<span class="c1">// Break after any character not covered by the rules above.</span>
<span class="k">return</span> <span class="kc">true</span><span class="p">;</span>
<span class="p">}</span>
<span class="p">};</span>
<span class="nx">Unicode</span><span class="p">.</span><span class="nx">WordBreak</span> <span class="o">=</span> <span class="nx">WordBreak</span><span class="p">;</span>
</pre></div>
</div>
</div>
</div>
<div class="yui-b">
<div class="nav">
<div id="moduleList" class="module">
<h4>Modules</h4>
<ul class="content">
<li class=""><a href="module_align-plugin.html" title="align-plugin">align-plugin</a></li>
<li class=""><a href="module_anim.html" title="anim">anim</a></li>
<li class=""><a href="module_async-queue.html" title="async-queue">async-queue</a></li>
<li class=""><a href="module_attribute.html" title="attribute">attribute</a></li>
<li class=""><a href="module_autocomplete.html" title="autocomplete">autocomplete</a></li>
<li class=""><a href="module_base.html" title="base">base</a></li>
<li class=""><a href="module_cache.html" title="cache">cache</a></li>
<li class=""><a href="module_classnamemanager.html" title="classnamemanager">classnamemanager</a></li>
<li class=""><a href="module_collection.html" title="collection">collection</a></li>
<li class=""><a href="module_console.html" title="console">console</a></li>
<li class=""><a href="module_console-filters.html" title="console-filters">console-filters</a></li>
<li class=""><a href="module_cookie.html" title="cookie">cookie</a></li>
<li class=""><a href="module_dataschema.html" title="dataschema">dataschema</a></li>
<li class=""><a href="module_datasource.html" title="datasource">datasource</a></li>
<li class=""><a href="module_datatype.html" title="datatype">datatype</a></li>
<li class=""><a href="module_dd.html" title="dd">dd</a></li>
<li class=""><a href="module_dom.html" title="dom">dom</a></li>
<li class=""><a href="module_dump.html" title="dump">dump</a></li>
<li class=""><a href="module_editor.html" title="editor">editor</a></li>
<li class=""><a href="module_escape.html" title="escape">escape</a></li>
<li class=""><a href="module_event.html" title="event">event</a></li>
<li class=""><a href="module_event-custom.html" title="event-custom">event-custom</a></li>
<li class=""><a href="module_event-gestures.html" title="event-gestures">event-gestures</a></li>
<li class=""><a href="module_event-simulate.html" title="event-simulate">event-simulate</a></li>
<li class=""><a href="module_event-touch.html" title="event-touch">event-touch</a></li>
<li class=""><a href="module_event-valuechange.html" title="event-valuechange">event-valuechange</a></li>
<li class=""><a href="module_highlight.html" title="highlight">highlight</a></li>
<li class=""><a href="module_history.html" title="history">history</a></li>
<li class=""><a href="module_history-deprecated.html" title="history-deprecated">history-deprecated</a></li>
<li class=""><a href="module_imageloader.html" title="imageloader">imageloader</a></li>
<li class=""><a href="module_intl.html" title="intl">intl</a></li>
<li class=""><a href="module_io.html" title="io">io</a></li>
<li class=""><a href="module_json.html" title="json">json</a></li>
<li class=""><a href="module_jsonp.html" title="jsonp">jsonp</a></li>
<li class=""><a href="module_loader.html" title="loader">loader</a></li>
<li class=""><a href="module_node.html" title="node">node</a></li>
<li class=""><a href="module_node-flick.html" title="node-flick">node-flick</a></li>
<li class=""><a href="module_node-focusmanager.html" title="node-focusmanager">node-focusmanager</a></li>
<li class=""><a href="module_node-menunav.html" title="node-menunav">node-menunav</a></li>
<li class=""><a href="module_oop.html" title="oop">oop</a></li>
<li class=""><a href="module_overlay.html" title="overlay">overlay</a></li>
<li class=""><a href="module_plugin.html" title="plugin">plugin</a></li>
<li class=""><a href="module_pluginhost.html" title="pluginhost">pluginhost</a></li>
<li class=""><a href="module_pluginhost-base.html" title="pluginhost-base">pluginhost-base</a></li>
<li class=""><a href="module_profiler.html" title="profiler">profiler</a></li>
<li class=""><a href="module_querystring.html" title="querystring">querystring</a></li>
<li class=""><a href="module_queue-promote.html" title="queue-promote">queue-promote</a></li>
<li class=""><a href="module_resize.html" title="resize">resize</a></li>
<li class=""><a href="module_scrollview.html" title="scrollview">scrollview</a></li>
<li class=""><a href="module_scrollview-base.html" title="scrollview-base">scrollview-base</a></li>
<li class=""><a href="module_scrollview-paginator.html" title="scrollview-paginator">scrollview-paginator</a></li>
<li class=""><a href="module_scrollview-scrollbars.html" title="scrollview-scrollbars">scrollview-scrollbars</a></li>
<li class=""><a href="module_shim-plugin.html" title="shim-plugin">shim-plugin</a></li>
<li class=""><a href="module_slider.html" title="slider">slider</a></li>
<li class=""><a href="module_sortable.html" title="sortable">sortable</a></li>
<li class=""><a href="module_stylesheet.html" title="stylesheet">stylesheet</a></li>
<li class=""><a href="module_substitute.html" title="substitute">substitute</a></li>
<li class=""><a href="module_swf.html" title="swf">swf</a></li>
<li class=""><a href="module_swfdetect.html" title="swfdetect">swfdetect</a></li>
<li class=""><a href="module_tabview.html" title="tabview">tabview</a></li>
<li class=""><a href="module_test.html" title="test">test</a></li>
<li class=""><a href="module_transition.html" title="transition">transition</a></li>
<li class="selected"><a href="module_unicode.html" title="unicode">unicode</a></li>
<li class=""><a href="module_uploader.html" title="uploader">uploader</a></li>
<li class=""><a href="module_widget.html" title="widget">widget</a></li>
<li class=""><a href="module_widget-anim.html" title="widget-anim">widget-anim</a></li>
<li class=""><a href="module_widget-child.html" title="widget-child">widget-child</a></li>
<li class=""><a href="module_widget-locale.html" title="widget-locale">widget-locale</a></li>
<li class=""><a href="module_widget-parent.html" title="widget-parent">widget-parent</a></li>
<li class=""><a href="module_widget-position.html" title="widget-position">widget-position</a></li>
<li class=""><a href="module_widget-position-align.html" title="widget-position-align">widget-position-align</a></li>
<li class=""><a href="module_widget-position-constrain.html" title="widget-position-constrain">widget-position-constrain</a></li>
<li class=""><a href="module_widget-stack.html" title="widget-stack">widget-stack</a></li>
<li class=""><a href="module_widget-stdmod.html" title="widget-stdmod">widget-stdmod</a></li>
<li class=""><a href="module_yql.html" title="yql">yql</a></li>
<li class=""><a href="module_yui.html" title="yui">yui</a></li>
</ul>
</div>
<div id="classList" class="module">
<h4>Classes</h4>
<ul class="content">
<li class=""><a href="Unicode.AccentFold.html" title="Unicode.AccentFold">Unicode.AccentFold</a></li>
<li class=""><a href="Unicode.Data.AccentFold.html" title="Unicode.Data.AccentFold">Unicode.Data.AccentFold</a></li>
<li class=""><a href="Unicode.Data.WordBreak.html" title="Unicode.Data.WordBreak">Unicode.Data.WordBreak</a></li>
<li class=""><a href="Unicode.WordBreak.html" title="Unicode.WordBreak">Unicode.WordBreak</a></li>
</ul>
</div>
<div id="fileList" class="module">
<h4>Files</h4>
<ul class="content">
<li class=""><a href="unicode-accentfold.js.html" title="unicode-accentfold.js">unicode-accentfold.js</a></li>
<li class=""><a href="unicode-data-accentfold.js.html" title="unicode-data-accentfold.js">unicode-data-accentfold.js</a></li>
<li class=""><a href="unicode-data-wordbreak.js.html" title="unicode-data-wordbreak.js">unicode-data-wordbreak.js</a></li>
<li class="selected"><a href="unicode-wordbreak.js.html" title="unicode-wordbreak.js">unicode-wordbreak.js</a></li>
</ul>
</div>
</div>
</div>
</div>
<div id="ft">
<hr />
Copyright &copy; 2010 Yahoo! Inc. All rights reserved.
</div>
</div>
<script type="text/javascript">
var ALL_YUI_PROPS = [{"access": "", "host": "Unicode.AccentFold", "name": "canFold", "url": "Unicode.AccentFold.html#method_canFold", "type": "method"}, {"access": "protected", "host": "Unicode.WordBreak", "name": "_classify", "url": "Unicode.WordBreak.html#method__classify", "type": "method"}, {"access": "", "host": "Unicode.AccentFold", "name": "compare", "url": "Unicode.AccentFold.html#method_compare", "type": "method"}, {"access": "", "host": "Unicode.AccentFold", "name": "filter", "url": "Unicode.AccentFold.html#method_filter", "type": "method"}, {"access": "", "host": "Unicode.AccentFold", "name": "fold", "url": "Unicode.AccentFold.html#method_fold", "type": "method"}, {"access": "", "host": "Unicode.WordBreak", "name": "getUniqueWords", "url": "Unicode.WordBreak.html#method_getUniqueWords", "type": "method"}, {"access": "", "host": "Unicode.WordBreak", "name": "getWords", "url": "Unicode.WordBreak.html#method_getWords", "type": "method"}, {"access": "protected", "host": "Unicode.WordBreak", "name": "_isWordBoundary", "url": "Unicode.WordBreak.html#method__isWordBoundary", "type": "method"}, {"access": "", "host": "Unicode.WordBreak", "name": "isWordBoundary", "url": "Unicode.WordBreak.html#method_isWordBoundary", "type": "method"}];
</script>
</body>
</html>