1056N/A<?xml version="1.0" encoding="UTF-8"?>
1056N/A<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
1276N/A<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>Compound Text Encoding</title><meta name="generator" content="DocBook XSL Stylesheets Vsnapshot_9276" /><style xmlns="" type="text/css">/*
1276N/A * Copyright (c) 2011 Gaetan Nadon
1276N/A * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
1276N/A *
1276N/A * Permission is hereby granted, free of charge, to any person obtaining a
1276N/A * copy of this software and associated documentation files (the "Software"),
1276N/A * to deal in the Software without restriction, including without limitation
1276N/A * the rights to use, copy, modify, merge, publish, distribute, sublicense,
1276N/A * and/or sell copies of the Software, and to permit persons to whom the
1276N/A * Software is furnished to do so, subject to the following conditions:
1276N/A *
1276N/A * The above copyright notice and this permission notice (including the next
1276N/A * paragraph) shall be included in all copies or substantial portions of the
1276N/A * Software.
1276N/A *
1276N/A * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1276N/A * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1276N/A * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1276N/A * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1276N/A * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1276N/A * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1276N/A * DEALINGS IN THE SOFTWARE.
1276N/A */
1276N/A
1276N/A/*
1276N/A * Shared stylesheet for X.Org documentation translated to HTML format
1276N/A * http://www.sagehill.net/docbookxsl/UsingCSS.html
1276N/A * http://www.w3schools.com/css/default.asp
1276N/A * https://addons.mozilla.org/en-US/firefox/addon/web-developer/developers
1276N/A * https://addons.mozilla.org/en-US/firefox/addon/font-finder/
1276N/A */
1276N/A
1276N/A/*
1276N/A * The sans-serif fonts are considered more legible on a computer screen
1276N/A * http://dry.sailingissues.com/linux-equivalents-verdana-arial.html
1276N/A *
1276N/A */
1276N/Abody {
1276N/A font-family: "Bitstream Vera Sans", "DejaVu Sans", Tahoma, Geneva, Arial, Sans-serif;
1276N/A /* In support of using "em" font size unit, the w3c recommended method */
1276N/A font-size: 100%;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: all elements requiring mono spaced fonts.
1276N/A *
1276N/A * The family names attempt to match the proportionally spaced font
1276N/A * family names such that the same font name is used for both.
1276N/A * We'd like to use Bitstream, for example, in both proportionally and
1276N/A * mono spaced font text.
1276N/A */
1276N/A.command,
1276N/A.errorcode,
1276N/A.errorname,
1276N/A.errortype,
1276N/A.filename,
1276N/A.funcsynopsis,
1276N/A.function,
1276N/A.parameter,
1276N/A.programlisting,
1276N/A.property,
1276N/A.screen,
1276N/A.structname,
1276N/A.symbol,
1276N/A.synopsis,
1276N/A.type
1276N/A{
1276N/A font-family: "Bitstream Vera Sans Mono", "DejaVu Sans Mono", Courier, "Liberation Mono", Monospace;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Books have a title page, a preface, some chapters and appendices,
1276N/A * a glossary, an index and a bibliography, in that order.
1276N/A *
1276N/A * An Article has no preface and no chapters. It has sections, appendices,
1276N/A * a glossary, an index and a bibliography.
1276N/A */
1276N/A
1276N/A/*
1276N/A * Selection: book main title and subtitle
1276N/A */
1276N/Adiv.book>div.titlepage h1.title,
1276N/Adiv.book>div.titlepage h2.subtitle {
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: article main title and subtitle
1276N/A */
1276N/Adiv.article>div.titlepage h2.title,
1276N/Adiv.article>div.titlepage h3.subtitle,
1276N/Adiv.article>div.sect1>div.titlepage h2.title,
1276N/Adiv.article>div.section>div.titlepage h2.title {
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: various types of authors and collaborators, individuals or corporate
1276N/A *
1276N/A * These authors are not always contained inside an authorgroup.
1276N/A * They can be contained inside a lot of different parent types where they might
1276N/A * not be centered.
1276N/A * Reducing the margin at the bottom makes a visual separation between authors
1276N/A * We specify here the ones on the title page, others may be added based on merit.
1276N/A */
1276N/Adiv.titlepage .authorgroup,
1276N/Adiv.titlepage .author,
1276N/Adiv.titlepage .collab,
1276N/Adiv.titlepage .corpauthor,
1276N/Adiv.titlepage .corpcredit,
1276N/Adiv.titlepage .editor,
1276N/Adiv.titlepage .othercredit {
1276N/A text-align: center;
1276N/A margin-bottom: 0.25em;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: the affiliation of various types of authors and collaborators,
1276N/A * individuals or corporate.
1276N/A */
1276N/Adiv.titlepage .affiliation {
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: product release information (X Version 11, Release 7)
1276N/A *
1276N/A * The releaseinfo element can be contained inside a lot of different parent
1276N/A * types where it might not be centered.
1276N/A * We specify here the one on the title page, others may be added based on merit.
1276N/A */
1276N/Adiv.titlepage p.releaseinfo {
1276N/A font-weight: bold;
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: publishing date
1276N/A */
1276N/Adiv.titlepage .pubdate {
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * The legal notices are displayed in smaller sized fonts
1276N/A * Justification is only supported in IE and therefore not requested.
1276N/A *
1276N/A */
1276N/A.legalnotice {
1276N/A font-size: small;
1276N/A font-style: italic;
1276N/A}
1276N/A
1276N/A/*
1276N/A * For documentation having multiple licenses, the copyright and legalnotice
1276N/A * elements sequence cannot instantiated multiple times.
1276N/A * The copyright notice and license text are therefore coded inside a legalnotice
1276N/A * element. The role attribute on the paragraph is used to allow styling of the
1276N/A * copyright notice text which should not be italicized.
1276N/A */
1276N/Ap.multiLicensing {
1276N/A font-style: normal;
1276N/A font-size: medium;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: book or article main ToC title
1276N/A * A paragraph is generated for the title rather than a level 2 heading.
1276N/A * We do not want to select chapters sub table of contents, only the main one
1276N/A */
1276N/Adiv.book>div.toc>p,
1276N/Adiv.article>div.toc>p {
1276N/A font-size: 1.5em;
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: major sections of a book or an article
1276N/A *
1276N/A * Unlike books, articles do not have a titlepage element for appendix.
1276N/A * Using the selector "div.titlepage h2.title" would be too general.
1276N/A */
1276N/Adiv.book>div.preface>div.titlepage h2.title,
1276N/Adiv.book>div.chapter>div.titlepage h2.title,
1276N/Adiv.article>div.sect1>div.titlepage h2.title,
1276N/Adiv.article>div.section>div.titlepage h2.title,
1276N/Adiv.book>div.appendix>div.titlepage h2.title,
1276N/Adiv.article>div.appendix h2.title,
1276N/Adiv.glossary>div.titlepage h2.title,
1276N/Adiv.index>div.titlepage h2.title,
1276N/Adiv.bibliography>div.titlepage h2.title {
1276N/A /* Add a border top over the major parts, just like printed books */
1276N/A /* The Gray color is already used for the ruler over the main ToC. */
1276N/A border-top-style: solid;
1276N/A border-top-width: 2px;
1276N/A border-top-color: Gray;
1276N/A /* Put some space between the border and the title */
1276N/A padding-top: 0.2em;
1276N/A text-align: center;
1276N/A}
1276N/A
1276N/A/*
1276N/A * A Screen is a verbatim environment for displaying text that the user might
1276N/A * see on a computer terminal. It is often used to display the results of a command.
1276N/A *
1276N/A * http://www.css3.info/preview/rounded-border/
1276N/A */
1276N/A.screen {
1276N/A background: #e0ffff;
1276N/A border-width: 1px;
1276N/A border-style: solid;
1276N/A border-color: #B0C4DE;
1276N/A border-radius: 1.0em;
1276N/A /* Browser's vendor properties prior to CSS 3 */
1276N/A -moz-border-radius: 1.0em;
1276N/A -webkit-border-radius: 1.0em;
1276N/A -khtml-border-radius: 1.0em;
1276N/A margin-left: 1.0em;
1276N/A margin-right: 1.0em;
1276N/A padding: 0.5em;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Emphasis program listings with a light shade of gray similar to what
1276N/A * DocBook XSL guide does: http://www.sagehill.net/docbookxsl/ProgramListings.html
1276N/A * Found many C API docs on the web using like shades of gray.
1276N/A */
1276N/A.programlisting {
1276N/A background: #F4F4F4;
1276N/A border-width: 1px;
1276N/A border-style: solid;
1276N/A border-color: Gray;
1276N/A padding: 0.5em;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Emphasis functions synopsis using a darker shade of gray.
1276N/A * Add a border such that it stands out more.
1276N/A * Set the padding so the text does not touch the border.
1276N/A */
1276N/A.funcsynopsis, .synopsis {
1276N/A background: #e6e6fa;
1276N/A border-width: 1px;
1276N/A border-style: solid;
1276N/A border-color: Gray;
1276N/A clear: both;
1276N/A margin: 0.5em;
1276N/A padding: 0.25em;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: paragraphs inside synopsis
1276N/A *
1276N/A * Removes the default browser margin, let the container set the padding.
1276N/A * Paragraphs are not always used in synopsis
1276N/A */
1276N/A.funcsynopsis p,
1276N/A.synopsis p {
1276N/A margin: 0;
1276N/A padding: 0;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: variable lists, informal tables and tables
1276N/A *
1276N/A * Note the parameter name "variablelist.as.table" in xorg-xhtml.xsl
1276N/A * A table with rows and columns is constructed inside div.variablelist
1276N/A *
1276N/A * Set the left margin so it is indented to the right
1276N/A * Display informal tables with single line borders
1276N/A */
1276N/Atable {
1276N/A margin-left: 0.5em;
1276N/A border-collapse: collapse;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Selection: paragraphs inside tables
1276N/A *
1276N/A * Removes the default browser margin, let the container set the padding.
1276N/A * Paragraphs are not always used in tables
1276N/A */
1276N/Atd p {
1276N/A margin: 0;
1276N/A padding: 0;
1276N/A}
1276N/A
1276N/A/*
1276N/A * Add some space between the left and right column.
1276N/A * The vertical alignment helps the reader associate a term
1276N/A * with a multi-line definition.
1276N/A */
1276N/Atd, th {
1276N/A padding-left: 1.0em;
1276N/A padding-right: 1.0em;
1276N/A vertical-align: top;
1276N/A}
1276N/A
1276N/A.warning {
1276N/A border: 1px solid red;
1276N/A background: #FFFF66;
1276N/A padding-left: 0.5em;
1276N/A}
1276N/A</style></head><body><div class="article"><div class="titlepage"><div><div><h2 class="title"><a id="ctext"></a>Compound Text Encoding</h2></div><div><h3 class="subtitle"><em>X Consortium Standard</em></h3></div><div><div class="authorgroup"><div class="author"><h3 class="author"><span class="firstname">Robert</span> <span class="othername">W.</span> <span class="surname">Scheifler</span></h3><div class="affiliation"><span class="orgname">X Consortium<br /></span></div></div></div></div><div><p class="releaseinfo">X Version 11, Release 7.7</p></div><div><p class="releaseinfo">Version 1.1</p></div><div><p class="copyright">Copyright © 1989 X Consortium</p></div><div><div class="legalnotice"><a id="id2525274"></a><p>
1056N/APermission is hereby granted, free of charge, to any person obtaining a copy
1056N/Aof this software and associated documentation files (the "Software"), to deal
1056N/Ain the Software without restriction, including without limitation the rights
1056N/Ato use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1056N/Acopies of the Software, and to permit persons to whom the Software is
1056N/Afurnished to do so, subject to the following conditions:
1056N/A</p><p>
1056N/AThe above copyright notice and this permission notice shall be included in
1056N/Aall copies or substantial portions of the Software.
1056N/A</p><p>
1276N/ATHE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1056N/AIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1056N/AFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1056N/AX CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
1056N/AAN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1056N/ACONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1056N/A</p><p>
1056N/AExcept as contained in this notice, the name of the X Consortium shall not be
1056N/Aused in advertising or otherwise to promote the sale, use or other dealings
1056N/Ain this Software without prior written authorization from the X Consortium.
1276N/A</p><p>X Window System is a trademark of The Open Group.</p></div></div></div><hr /></div><div class="toc"><p><strong>Table of Contents</strong></p><dl><dt><span class="sect1"><a href="#Overview">Overview</a></span></dt><dt><span class="sect1"><a href="#Values">Values</a></span></dt><dt><span class="sect1"><a href="#Control_Characters">Control Characters</a></span></dt><dt><span class="sect1"><a href="#Standard_Character_Set_Encodings">Standard Character Set Encodings</a></span></dt><dt><span class="sect1"><a href="#Approved_Standard_Encodings">Approved Standard Encodings</a></span></dt><dt><span class="sect1"><a href="#Non_Standard_Character_Set_Encodings">Non-Standard Character Set Encodings</a></span></dt><dt><span class="sect1"><a href="#Directionality">Directionality</a></span></dt><dt><span class="sect1"><a href="#Resources">Resources</a></span></dt><dt><span class="sect1"><a href="#Font_Names">Font Names</a></span></dt><dt><span class="sect1"><a href="#Extensions">Extensions</a></span></dt><dt><span class="sect1"><a href="#Errors">Errors</a></span></dt></dl></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Overview"></a>Overview</h2></div></div></div><p>
1056N/ACompound Text is a format for multiple character set data, such as
1056N/Amulti-lingual text. The format is based on ISO
1056N/Astandards for encoding and combining character sets. Compound Text is intended
1056N/Ato be used in three main contexts: inter-client communication using selections,
1056N/Aas defined in the
1056N/A<span class="emphasis"><em>Inter-Client Communication Conventions Manual</em></span>
1056N/A(ICCCM);
1056N/Awindow properties (e.g., window manager hints as defined in the ICCCM);
1056N/Aand resources (e.g., as defined in Xlib and the Xt Intrinsics).
1056N/A</p><p>
1056N/ACompound Text is intended as an external representation, or interchange format,
1056N/Anot as an internal representation. It is expected (but not required) that
1056N/Aclients will convert Compound Text to some internal representation for
1056N/Aprocessing and rendering, and convert from that internal representation to
1056N/ACompound Text when providing textual data to another client.
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Values"></a>Values</h2></div></div></div><p>
1056N/A
1056N/AThe name of this encoding is "COMPOUND_TEXT". When text values are used in
1056N/Athe ICCCM-compliant selection mechanism or are stored as window properties in
1056N/Athe server, the type used should be the atom for "COMPOUND_TEXT".
1056N/A</p><p>
1056N/A
1056N/AOctet values are represented in this document as two decimal numbers in the
1056N/Aform col/row. This means the value (col * 16) + row. For example, 02/01 means
1056N/Athe value 33.
1056N/A</p><p>
1056N/AFor our purposes, the octet encoding space is divided into four ranges:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /></colgroup><tbody><tr><td align="left">C0</td><td align="left">octets from 00/00 to 01/15</td></tr><tr><td align="left">GL</td><td align="left">octets from 02/00 to 07/15</td></tr><tr><td align="left">C1</td><td align="left">octets from 08/00 to 09/15</td></tr><tr><td align="left">GR</td><td align="left">octets from 10/00 to 15/15</td></tr></tbody></table></div><p>
1056N/A
1056N/AC0 and C1 are "control character" sets, while GL and GR are "graphic
1056N/Acharacter" sets. Only a subset of C0 and C1 octets are used in the encoding,
1056N/Aand depending on the character set encoding defined as GL or GR, a subset of
1056N/AGL and GR octets may be used; see below for details. All octets (00/00 to
1056N/A15/15) may appear inside the text of extended segments (defined below).
1056N/A</p><p>
1056N/A
1056N/A[For those familiar with ISO 2022, we will use only an 8-bit environment, and
1056N/Awe will always use G0 for GL and G1 for GR.]
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Control_Characters"></a>Control Characters</h2></div></div></div><p>
1056N/AIn C0, only the following values will be used:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /><col align="left" class="c3" /></colgroup><tbody><tr><td align="left">00/09</td><td align="left">HT</td><td align="left">HORIZONTAL TABULATION</td></tr><tr><td align="left">00/10</td><td align="left">NL</td><td align="left">NEW LINE</td></tr><tr><td align="left">01/11</td><td align="left">ESC</td><td align="left">(ESCAPE)</td></tr></tbody></table></div><p>
1056N/AIn C1, only the following value will be used:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /><col align="left" class="c3" /></colgroup><tbody><tr><td align="left">09/11</td><td align="left">CSI</td><td align="left">CONTROL SEQUENCE INTRODUCER</td></tr></tbody></table></div><p>
1056N/A
1056N/A[The alternate 7-bit CSI encoding 01/11 05/11 is not used in Compound Text.]
1056N/A</p><p>
1056N/A
1056N/ANo control sequences are defined in Compound Text for changing the C0 and C1
1056N/Asets.
1056N/A</p><p>
1056N/A
1056N/AA horizontal tab can be represented with the octet 00/09. Specification of
1056N/Atabulation width settings is not part of Compound Text and must be obtained
1056N/Afrom context (in an unspecified manner).
1056N/A</p><p>
1056N/A
1056N/A[Inclusion of horizontal tab is for consistency with the STRING type currently
1056N/Adefined in the ICCCM.]
1056N/A</p><p>
1056N/A
1056N/AA newline (line separator/terminator) can be represented with the octet 00/10.
1056N/A</p><p>
1056N/A
1056N/A[Note that 00/10 is normally LINEFEED, but is being interpreted as NEWLINE.
1056N/AThis can be thought of as using the (deprecated) NEW LINE mode, E.1.3, in ISO
1056N/A6429. Use of this value instead of 08/05 (NEL, NEXT LINE) is for consistency
1056N/Awith the STRING type currently defined in the ICCCM.]
1056N/A</p><p>
1056N/A
1056N/AThe remaining C0 and C1 values (01/11 and 09/11) are only used in the control
1056N/Asequences defined below.
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Standard_Character_Set_Encodings"></a>Standard Character Set Encodings</h2></div></div></div><p>
1056N/A
1056N/AThe default GL and GR sets in Compound Text correspond to the left and right
1056N/Ahalves of ISO 8859-1 (Latin 1). As such, any legal instance of a STRING type
1056N/A(as defined in the ICCCM) is also a legal instance of type COMPOUND_TEXT.
1056N/A</p><p>
1056N/A[The implied initial state in ISO 2022 is defined with the sequence:
1056N/A 01/11 02/00 04/03 GO and G1 in an 8-bit environment only. Designation also invokes.
1056N/A 01/11 02/00 04/07 In an 8-bit environment, C1 represented as 8-bits.
1056N/A 01/11 02/00 04/09 Graphic character sets can be 94 or 96.
1056N/A 01/11 02/00 04/11 8-bit code is used.
1056N/A 01/11 02/08 04/02 Designate ASCII into G0.
1056N/A 01/11 02/13 04/01 Designate right-hand part of ISO Latin-1 into G1.
1056N/A]
1056N/A</p><p>
1056N/ATo define one of the approved standard character set encodings to be
1056N/Athe GL set, one of the following control sequences is used:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /><col align="left" class="c3" /><col align="left" class="c4" /></colgroup><tbody><tr><td align="left">01/11</td><td align="left">02/08</td><td align="left">{I} F</td><td align="left">94 character set</td></tr><tr><td align="left">01/11</td><td align="left">02/04</td><td align="left">02/08{I} F</td><td align="left">94<sup>N</sup> character set</td></tr></tbody></table></div><p>
1056N/A
1056N/ATo define one of the approved standard character set encodings to be
1056N/Athe GR set, one of the following control sequences is used:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /><col align="left" class="c3" /><col align="left" class="c4" /></colgroup><tbody><tr><td align="left">01/11</td><td align="left">02/09</td><td align="left">{I} F</td><td align="left">94 character set</td></tr><tr><td align="left">01/11</td><td align="left">02/13</td><td align="left">{I} F</td><td align="left">96 character set</td></tr><tr><td align="left">01/11</td><td align="left">02/04</td><td align="left">02/09 {I} F</td><td align="left">94<sup>N</sup> character set</td></tr></tbody></table></div><p>
1056N/A
1056N/AThe "F"in the control sequences above stands for "Final character", which
1056N/Ais always in the range 04/00 to 07/14. The "{I}" stands for zero or more
1056N/A"intermediate characters", which are always in the range 02/00 to 02/15, with
1056N/Athe first intermediate character always in the range 02/01 to 02/03. The
1056N/Aregistration authority has defined an "{I} F" sequence for each registered
1056N/Acharacter set encoding.
1056N/A</p><p>
1056N/A
1056N/A[Final characters for private encodings (in the range 03/00 to 03/15) are not
1056N/Apermitted here in Compound Text.]
1056N/A</p><p>
1056N/A
1056N/AFor GL, octet 02/00 is always defined as SPACE, and octet 07/15 (normally
1056N/ADELETE) is never used. For a 94-character set defined as GR, octets 10/00 and
1056N/A15/15 are never used.
1056N/A</p><p>
1056N/A
1056N/A[This is consistent with ISO 2022.]
1056N/A</p><p>
1056N/A
1056N/AA 94<sup>N</sup> character set uses N octets (N &gt; 1) for each character.
1056N/AThe value of N is derived from the column value for F:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /></colgroup><tbody><tr><td align="left">column 04 or 05</td><td align="left">2 octets</td></tr><tr><td align="left">column 06</td><td align="left">3 octets</td></tr><tr><td align="left">column 07</td><td align="left">4 or more octets</td></tr></tbody></table></div><p>
1056N/A
1056N/AIn a 94<sup>N</sup> encoding, the octet values 02/00 and 07/15 (in GL) and
1056N/A10/00 and 15/15 (in GR) are never used.
1056N/A</p><p>
1056N/A
1056N/A[The column definitions come from ISO 2022.]
1056N/A</p><p>
1056N/A
1056N/AOnce a GL or GR set has been defined, all further octets in that range (except
1056N/Awithin control sequences and extended segments) are interpreted with respect to
1056N/Athat character set encoding, until the GL or GR set is redefined. GL and GR
1056N/Asets can be defined independently, they do not have to be defined in pairs.
1056N/A</p><p>
1056N/A
1056N/ANote that when actually using a character set encoding as the GR set, you must
1056N/Aforce the most significant bit (08/00) of each octet to be a one, so that it
1056N/Afalls in the range 10/00 to 15/15.
1056N/A</p><p>
1056N/A
1056N/A[Control sequences to specify character set encoding revisions (as in section
1056N/A6.3.13 of ISO 2022) are not used in Compound Text. Revision indicators do not
1056N/Aappear to provide useful information in the context of Compound Text. The most
1056N/Arecent revision can always be assumed, since revisions are upward compatible.]
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Approved_Standard_Encodings"></a>Approved Standard Encodings</h2></div></div></div><p>
1056N/AThe following are the approved standard encodings to be used with Compound
1056N/AText. Note that none have Intermediate characters; however, a good parser will
1056N/Astill deal with Intermediate characters in the event that additional encodings
1056N/Aare later added to this list.
1276N/A</p><div class="informaltable"><table border="1"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /><col align="left" class="c3" /></colgroup><thead><tr><th align="left">{I} F</th><th align="left">94/96</th><th align="left">Description</th></tr></thead><tbody><tr><td align="left">4/02</td><td align="left">94</td><td align="left">
1056N/A7-bit ASCII graphics (ANSI X3.4-1968), Left half of ISO 8859 sets
1056N/A </td></tr><tr><td align="left">04/09</td><td align="left">94</td><td align="left">
1056N/ARight half of JIS X0201-1976 (reaffirmed 1984),
1056N/A8-Bit Alphanumeric-Katakana Code
1056N/A </td></tr><tr><td align="left">04/10</td><td align="left">94</td><td align="left">
1056N/ALeft half of JIS X0201-1976 (reaffirmed 1984),
1056N/A8-Bit Alphanumeric-Katakana Code
1056N/A </td></tr><tr><td align="left">04/01</td><td align="left">96</td><td align="left">Right half of ISO 8859-1, Latin alphabet No. 1</td></tr><tr><td align="left">04/02</td><td align="left">96</td><td align="left">Right half of ISO 8859-2, Latin alphabet No. 2</td></tr><tr><td align="left">04/03</td><td align="left">96</td><td align="left">Right half of ISO 8859-3, Latin alphabet No. 3</td></tr><tr><td align="left">04/04</td><td align="left">96</td><td align="left">Right half of ISO 8859-4, Latin alphabet No. 4</td></tr><tr><td align="left">04/06</td><td align="left">96</td><td align="left">Right half of ISO 8859-7, Latin/Greek alphabet</td></tr><tr><td align="left">04/07</td><td align="left">96</td><td align="left">Right half of ISO 8859-6, Latin/Arabic alphabet</td></tr><tr><td align="left">04/08</td><td align="left">96</td><td align="left">Right half of ISO 8859-8, Latin/Hebrew alphabet</td></tr><tr><td align="left">04/12</td><td align="left">96</td><td align="left">Right half of ISO 8859-5, Latin/Cyrillic alphabet</td></tr><tr><td align="left">04/13</td><td align="left">96</td><td align="left">Right half of ISO 8859-9, Latin alphabet No. 5</td></tr><tr><td align="left">04/01</td><td align="left">942</td><td align="left">GB2312-1980, China (PRC) Hanzi</td></tr><tr><td align="left">04/02</td><td align="left">942</td><td align="left">JIS X0208-1983, Japanese Graphic Character Set</td></tr><tr><td align="left">04/03</td><td align="left">942</td><td align="left">KS C5601-1987, Korean Graphic Character Set</td></tr></tbody></table></div><p>
1056N/A
1056N/AThe sets listed as "Left half of ..." should always be defined as GL. The
1056N/Asets listed as "Right half of ..." should always be defined as GR. Other
1056N/Asets can be defined either as GL or GR.
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Non_Standard_Character_Set_Encodings"></a>Non-Standard Character Set Encodings</h2></div></div></div><p>
1056N/ACharacter set encodings that are not in the list of approved standard
1056N/Aencodings can be included
1056N/Ausing "extended segments". An extended segment begins with one of the
1056N/Afollowing sequences:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /></colgroup><tbody><tr><td align="left">01/11 2/05 02/15 03/00 M L</td><td align="left">variable number of octets per character</td></tr><tr><td align="left">01/11 2/05 02/15 03/01 M L</td><td align="left">1 octet per character</td></tr><tr><td align="left">01/11 2/05 02/15 03/02 M L</td><td align="left">2 octet per character</td></tr><tr><td align="left">01/11 2/05 02/15 03/03 M L</td><td align="left">3 octet per character</td></tr><tr><td align="left">01/11 2/05 02/15 03/04 M L</td><td align="left">4 octet per character</td></tr></tbody></table></div><p>
1056N/A[This uses the "other coding system" of ISO 2022, using private Final
1056N/Acharacters.]
1056N/A</p><p>
1056N/A
1056N/AThe "M" and "L" octets represent a 14-bit unsigned value giving the number
1056N/Aof octets that appear in the remainder of the segment. The number is computed
1056N/Aas ((M - 128) * 128) + (L - 128). The most significant bit M and L are always
1056N/Aset to one. The remainder of the segment consists of two parts, the name of
1056N/Athe character set encoding and the actual text. The name of the encoding comes
1056N/Afirst and is separated from the text by the octet 00/02 (STX, START OF TEXT).
1056N/ANote that the length defined by M and L includes the encoding name and
1056N/Aseparator.
1056N/A</p><p>
1056N/A
1056N/A[The encoding of the length is chosen to avoid having zero octets in Compound
1056N/AText when possible, because embedded NUL values are problematic in many C
1056N/Alanguage routines. The use of zero octets cannot be ruled out entirely
1056N/Ahowever, since some octets in the actual text of the extended segment may have
1056N/Ato be zero.]
1056N/A</p><p>
1056N/A
1056N/AThe name of the encoding should be registered with the X Consortium to avoid
1056N/Aconflicts and should when appropriate match the CharSet Registry and Encoding
1056N/Aregistration used in the X Logical Font Description. The name itself should be
1056N/Aencoded using ISO 8859-1 (Latin 1), should not use question mark (03/15) or
1056N/Aasterisk (02/10), and should use hyphen (02/13) only in accordance with the X
1056N/ALogical Font Description.
1056N/A</p><p>
1056N/A
1056N/AExtended segments are not to be used for any character set encoding that can
1056N/Abe constructed from a GL/GR pair of approved standard encodings. For
1056N/Aexample, it is incorrect to use an extended segment for any of the ISO 8859
1056N/Afamily of encodings.
1056N/A</p><p>
1056N/A
1056N/AIt should be noted that the contents of an extended segment are arbitrary;
1056N/Afor example,
1056N/Athey may contain octets in the C0 and C1 ranges, including 00/00, and
1056N/Aoctets comprising a given character may differ in their most significant bit.
1056N/A</p><p>
1056N/A
1056N/A[ISO-registered "other coding systems" are not used in Compound Text;
1056N/Aextended segments are the only mechanism for non-2022 encodings.]
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Directionality"></a>Directionality</h2></div></div></div><p>
1056N/A
1056N/AIf desired, horizontal text direction can be indicated using the following
1056N/Acontrol sequences:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /></colgroup><tbody><tr><td align="left">09/11 03/01 05/13</td><td align="left">begin left-to-right text</td></tr><tr><td align="left">09/11 03/02 05/13</td><td align="left">begin right-to-left text</td></tr><tr><td align="left">09/11 05/13</td><td align="left">end of string</td></tr></tbody></table></div><p>
1056N/A
1056N/A[This is a subset of the SDS (START DIRECTED STRING) control in the Draft
1056N/ABidirectional Addendum to ISO 6429.]
1056N/A</p><p>
1056N/A
1056N/ADirectionality can be nested. Logically, a stack of directions is maintained.
1056N/AEach of the first two control sequences pushes a new direction on the stack,
1056N/Aand the third sequence (revert) pops a direction from the stack. The stack
1056N/Astarts out empty at the beginning of a Compound Text string. When the stack is
1056N/Aempty, the directionality of the text is unspecified.
1056N/A</p><p>
1056N/A
1056N/ADirectionality applies to all subsequent text, whether in GL, GR, or an
1056N/Aextended segment. If the desired directionality of GL, GR, or extended
1056N/Asegments differs, then directionality control sequences must be inserted when
1056N/Aswitching between them.
1056N/A</p><p>
1056N/A
1056N/ANote that definition of GL and GR sets is independent of directionality;
1056N/Adefining a new GL or GR set does not change the current directionality, and
1056N/Apushing or popping a directionality does not change the current GL and GR
1056N/Adefinitions.
1056N/A</p><p>
1056N/A
1056N/ASpecification of directionality is entirely optional; text direction should be
1056N/Aclear from context in most cases. However, it must be the case that either
1056N/Aall characters in a Compound Text string have explicitly specified direction
1056N/Aor that all characters have unspecified direction. That is, if directionality
1056N/Acontrol sequences are used, the first such control sequence must precede the
1056N/Afirst graphic character in a Compound Text string, and graphic characters are
1056N/Anot permitted whenever the directionality stack is empty.
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Resources"></a>Resources</h2></div></div></div><p>
1056N/A
1056N/ATo use Compound Text in a resource, you can simply treat all octets as if they
1056N/Awere ASCII/Latin-1 and just replace all "\" octets (05/12) with the two
1056N/Aoctets "\\", all newline octets (00/10) with the two octets "\n", and
1056N/Aall zero octets with the four octets "\000".
1056N/AIt is up to the client making use of the resource to interpret the data as
1056N/ACompound Text; the policy by which this is ascertained is not constrained by
1056N/Athe Compound Text specification.
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Font_Names"></a>Font Names</h2></div></div></div><p>
1056N/AThe following CharSet names for the standard character set encodings are
1056N/Aregistered for use in font names under the X Logical Font Description:
1276N/A</p><div class="informaltable"><table border="1"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /><col align="left" class="c3" /></colgroup><thead><tr><th align="left">Name</th><th align="left">Encoding Standard</th><th align="left">Description</th></tr></thead><tbody><tr><td align="left">ISO8859-1</td><td align="left">ISO8859-1</td><td align="left">Latinalphabet No. 1</td></tr><tr><td align="left">ISO8859-2</td><td align="left">ISO8859-2</td><td align="left">Latinalphabet No. 2</td></tr><tr><td align="left">ISO8859-3</td><td align="left">ISO8859-3</td><td align="left">Latinalphabet No. 3</td></tr><tr><td align="left">ISO8859-4</td><td align="left">ISO8859-4</td><td align="left">Latinalphabet No. 4</td></tr><tr><td align="left">ISO8859-5</td><td align="left">ISO 8859-5</td><td align="left">Latin/Cyrillic alphabet</td></tr><tr><td align="left">ISO8859-6</td><td align="left">ISO 8859-6</td><td align="left">Latin/Arabic alphabet</td></tr><tr><td align="left">ISO8859-7</td><td align="left">ISO8859-7</td><td align="left">Latin/Greekalphabet</td></tr><tr><td align="left">ISO8859-8</td><td align="left">ISO8859-8</td><td align="left">Latin/Hebrew alphabet</td></tr><tr><td align="left">ISO8859-9</td><td align="left">ISO8859-9</td><td align="left">Latinalphabet No. 5</td></tr><tr><td align="left">JISX0201.1976-0</td><td align="left">JIS X0201-1976 (reaffirmed 1984)</td><td align="left">8-bit Alphanumeric-Katakana Code</td></tr><tr><td align="left">GB2312.1980-0</td><td align="left">GB2312-1980, GL encoding</td><td align="left">China (PRC) Hanzi</td></tr><tr><td align="left">JISX0208.1983-0</td><td align="left">JIS X0208-1983, GL encoding</td><td align="left">Japanese Graphic Character Set</td></tr><tr><td align="left">KSC5601.1987-0</td><td align="left">KS C5601-1987, GL encoding</td><td align="left">Korean Graphic Character Set</td></tr></tbody></table></div></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Extensions"></a>Extensions</h2></div></div></div><p>
1056N/A
1056N/AThere is no absolute requirement for a parser to deal with anything but the
1056N/Aparticular encoding syntax defined in this specification. However, it is
1056N/Apossible that Compound Text may be extended in the future, and as such it may
1056N/Abe desirable to construct the parser to handle 2022/6429 syntax more generally.
1056N/A</p><p>
1056N/A
1056N/AThere are two general formats covering all control sequences that are expected
1056N/Ato appear in extensions:
1056N/A</p><p>
1056N/A01/11 {I} F
1056N/A</p><p>
1056N/AFor this format, I is always in the range 02/00 to 02/15, and F is always
1056N/Ain the range 03/00 to 07/14.
1056N/A</p><p>
1056N/A09/11 {P} {I} F
1056N/A</p><p>
1056N/AFor this format, P is always in the range 03/00 to 03/15, I is always in
1056N/Athe range 02/00 to 02/15, and F is always in the range 04/00 to 07/14.
1056N/A</p><p>
1056N/A
1056N/AIn addition, new (singleton) control characters (in the C0 and C1 ranges) might
1056N/Abe defined in the future.
1056N/A</p><p>
1056N/A
1056N/AFinally, new kinds of "segments" might be defined in the future using syntax
1056N/Asimilar to extended segments:
1056N/A</p><p>
1056N/A01/11 02/05 02/15 F M L
1056N/A</p><p>
1056N/AFor this format, F is in the range 03/05 to 3/15. M and L are as defined
1056N/Ain extended segments. Such a segment will always be followed by the number
1056N/Aof octets defined by M and L. These octets can have arbitrary values and
1056N/Aneed not follow the internal structure defined for current extended
1056N/Asegments.
1056N/A</p><p>
1056N/A
1056N/AIf extensions to this specification are defined in the future, then any string
1056N/Aincorporating instances of such extensions must start with one of the following
1056N/Acontrol sequences:
1276N/A</p><div class="informaltable"><table border="0"><colgroup><col align="left" class="c1" /><col align="left" class="c2" /></colgroup><tbody><tr><td align="left">01/11 02/03 V 03/00</td><td align="left">ignoring extensions is OK</td></tr><tr><td align="left">01/11 02/03 V 03/01</td><td align="left">ignoring extensions is not OK</td></tr></tbody></table></div><p>
1056N/A
1056N/AIn either case, V is in the range 02/00 to 02/15 and indicates the major
1056N/Aversion
1056N/Aminus one of the specification being used. These version control sequences are
1056N/Afor use by clients that implement earlier versions, but have implemented a
1056N/Ageneral parser. The first control sequence indicates that it is acceptable to
1056N/Aignore all extension control sequences; no mandatory information will be lost
1056N/Ain the process. The second control sequence indicates that it is unacceptable
1056N/Ato ignore any extension control sequences; mandatory information would be lost
1056N/Ain the process. In general, it will be up to the client generating the
1056N/ACompound Text to decide which control sequence to use.
1276N/A</p></div><div class="sect1"><div class="titlepage"><div><div><h2 class="title" style="clear: both"><a id="Errors"></a>Errors</h2></div></div></div><p>
1056N/A
1056N/AIf a Compound Text string does not match the specification here (e.g., uses
1056N/Aundefined control characters, or undefined control sequences, or incorrectly
1056N/Aformatted extended segments), it is best to treat the entire string as invalid,
1056N/Aexcept as indicated by a version control sequence.
1056N/A</p></div></div></body></html>