ExtendEntityManager.cxx revision 7c478bd95313f5f23a4c958a745db2134aa03244
// Copyright (c) 1994, 1995, 1996 James Clark
// See the file COPYING for copying permission.
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __GNUG__
#pragma implementation
#endif
#include "splib.h"
#include "ExtendEntityManager.h"
#include "Message.h"
#include "MessageArg.h"
#include "OffsetOrderedList.h"
#include "rtti.h"
#include "StorageManager.h"
#include "Vector.h"
#include "NCVector.h"
#include "Owner.h"
#include "constant.h"
#include "EntityManagerMessages.h"
#include "StorageObjectPosition.h"
#include "Owner.h"
#include "CodingSystem.h"
#include "CodingSystemKit.h"
#include "InputSource.h"
#include "Mutex.h"
#include "macros.h"
#include "EntityCatalog.h"
#include "CharMap.h"
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>
#ifdef DECLARE_MEMMOVE
extern "C" {
void *memmove(void *, const void *, size_t);
}
#endif
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
const char EOFCHAR = '\032'; // Control-Z
class ExternalInputSource;
class EntityManagerImpl : public ExtendEntityManager {
public:
EntityManagerImpl(StorageManager *defaultStorageManager,
const InputCodingSystem *defaultCodingSystem,
const ConstPtr<InputCodingSystemKit> &,
Boolean internalCharsetIsDocCharset);
void setCatalogManager(CatalogManager *catalogManager);
void registerStorageManager(StorageManager *);
InputSource *open(const StringC &sysid,
const CharsetInfo &,
InputSourceOrigin *,
unsigned flags,
Messenger &);
const CharsetInfo &charset() const;
Boolean internalCharsetIsDocCharset() const;
ConstPtr<EntityCatalog> makeCatalog(StringC &systemId,
const CharsetInfo &charset,
Messenger &mgr);
Boolean expandSystemId(const StringC &,
const Location &,
Boolean isNdata,
const CharsetInfo &,
const StringC *,
Messenger &,
StringC &);
Boolean mergeSystemIds(const Vector<StringC> &,
Boolean mapCatalogDocument,
const CharsetInfo &,
Messenger &mgr,
StringC &) const;
StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const;
StorageManager *lookupStorageType(const char *) const;
StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const;
const InputCodingSystem *lookupCodingSystem(const StringC &,
const CharsetInfo &,
Boolean isBctf,
const char *&) const;
Boolean resolveSystemId(const StringC &str,
const CharsetInfo &idCharset,
Messenger &mgr,
const Location &defLocation,
Boolean isNdata,
ParsedSystemId &parsedSysid) const;
Boolean parseSystemId(const StringC &str,
const CharsetInfo &idCharset,
Boolean isNdata,
const StorageObjectLocation *def,
Messenger &mgr,
ParsedSystemId &parsedSysid) const;
const CharsetInfo &internalCharset(const CharsetInfo &docCharset) const {
if (internalCharsetIsDocCharset_)
return docCharset;
else
return charset();
}
private:
EntityManagerImpl(const EntityManagerImpl &); // undefined
void operator=(const EntityManagerImpl &); // undefined
static Boolean defLocation(const Location &, StorageObjectLocation &);
static Boolean matchKey(const StringC &type, const char *s,
const CharsetInfo &internalCharset);
NCVector<Owner<StorageManager> > storageManagers_;
Owner<StorageManager> defaultStorageManager_;
const InputCodingSystem *defaultCodingSystem_;
Owner<CatalogManager> catalogManager_;
Boolean internalCharsetIsDocCharset_;
ConstPtr<InputCodingSystemKit> codingSystemKit_;
friend class FSIParser;
};
class ExternalInfoImpl : public ExternalInfo {
RTTI_CLASS
public:
ExternalInfoImpl(ParsedSystemId &parsedSysid);
const StorageObjectSpec &spec(size_t i) const;
size_t nSpecs() const;
const ParsedSystemId &parsedSystemId() const;
void noteRS(Offset);
void noteStorageObjectEnd(Offset);
void noteInsertedRSs();
void setDecoder(size_t i, Decoder *);
void setId(size_t i, StringC &);
void getId(size_t i, StringC &) const;
Boolean convertOffset(Offset, StorageObjectLocation &) const;
private:
ParsedSystemId parsedSysid_;
NCVector<StorageObjectPosition> position_;
size_t currentIndex_;
// list of inserted RSs
OffsetOrderedList rsList_;
Boolean notrack_;
Mutex mutex_;
};
class ExternalInputSource : public InputSource {
public:
ExternalInputSource(ParsedSystemId &parsedSysid,
const CharsetInfo &internalCharset,
const CharsetInfo &docCharset,
Boolean internalCharsetIsDocCharset,
Char replacementChar,
InputSourceOrigin *origin,
unsigned flags);
void pushCharRef(Char, const NamedCharRef &);
~ExternalInputSource();
private:
Xchar fill(Messenger &);
Boolean rewind(Messenger &);
void willNotRewind();
void setDocCharset(const CharsetInfo &, const CharsetInfo &);
void willNotSetDocCharset();
void init();
void noteRS();
void noteRSAt(const Char *);
void reallocateBuffer(size_t size);
void insertChar(Char);
void buildMap(const CharsetInfo &internalCharset,
const CharsetInfo &docCharset);
void buildMap1(const CharsetInfo &, const CharsetInfo &);
static const Char *findNextCr(const Char *start, const Char *end);
static const Char *findNextLf(const Char *start, const Char *end);
static const Char *findNextCrOrLf(const Char *start, const Char *end);
ExternalInfoImpl *info_;
Char *buf_;
const Char *bufLim_;
Offset bufLimOffset_;
size_t bufSize_;
size_t readSize_;
NCVector<Owner<StorageObject> > sov_;
StorageObject *so_;
size_t soIndex_;
Boolean insertRS_;
Decoder *decoder_;
const char *leftOver_;
size_t nLeftOver_;
Boolean mayRewind_;
Boolean maySetDocCharset_;
Boolean mayNotExist_;
enum RecordType {
unknown,
crUnknown,
crlf,
lf,
cr,
asis
};
RecordType recordType_;
Boolean zapEof_;
Boolean internalCharsetIsDocCharset_;
Char replacementChar_;
Ptr<CharMapResource<Unsigned32> > map_;
};
class FSIParser {
public:
FSIParser(const StringC &, const CharsetInfo &idCharset,
Boolean isNdata,
const StorageObjectLocation *defLoc,
const EntityManagerImpl *em,
Messenger &mgr);
Boolean parse(ParsedSystemId &parsedSysid);
static const char *recordsName(StorageObjectSpec::Records records);
struct RecordType {
const char *name;
StorageObjectSpec::Records value;
};
private:
Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid);
Boolean convertId(StringC &, Xchar smcrd, const StorageManager *);
Xchar get();
void unget();
StorageManager *lookupStorageType(const StringC &key, Boolean &neutral);
Boolean matchKey(const StringC &, const char *);
Boolean matchChar(Xchar, char);
Boolean isS(Xchar);
Boolean convertDigit(Xchar c, int &weight);
void uncharref(StringC &);
Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral,
Xchar &smcrd, Boolean &fold);
Boolean setCatalogAttributes(ParsedSystemId &parsedSysid);
void setDefaults(StorageObjectSpec &sos);
Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value);
Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &);
void convertMinimumLiteral(const StringC &from, StringC &to);
const StringC &str_;
size_t strIndex_;
Messenger &mgr_;
const EntityManagerImpl *em_;
const StorageObjectSpec *defSpec_;
const StringC *defId_;
const CharsetInfo &idCharset_;
Boolean isNdata_;
static RecordType recordTypeTable[];
};
const Char RS = '\n';
const Char RE = '\r';
ExtendEntityManager::CatalogManager::~CatalogManager()
{
}
ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm,
const InputCodingSystem *cs,
const ConstPtr<InputCodingSystemKit> &csKit,
Boolean internalCharsetIsDocCharset)
{
return new EntityManagerImpl(sm, cs, csKit, internalCharsetIsDocCharset);
}
Boolean ExtendEntityManager::externalize(const ExternalInfo *info,
Offset off,
StorageObjectLocation &loc)
{
if (!info)
return false;
const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
if (!p)
return false;
return p->convertOffset(off, loc);
}
const ParsedSystemId *
ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info)
{
if (!info)
return 0;
const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
if (!p)
return 0;
return &p->parsedSystemId();
}
EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager,
const InputCodingSystem *defaultCodingSystem,
const ConstPtr<InputCodingSystemKit> &codingSystemKit,
Boolean internalCharsetIsDocCharset)
: defaultStorageManager_(defaultStorageManager),
defaultCodingSystem_(defaultCodingSystem),
codingSystemKit_(codingSystemKit),
internalCharsetIsDocCharset_(internalCharsetIsDocCharset)
{
}
Boolean EntityManagerImpl::internalCharsetIsDocCharset() const
{
return internalCharsetIsDocCharset_;
}
const CharsetInfo &EntityManagerImpl::charset() const
{
return codingSystemKit_->systemCharset();
}
InputSource *EntityManagerImpl::open(const StringC &sysid,
const CharsetInfo &docCharset,
InputSourceOrigin *origin,
unsigned flags,
Messenger &mgr)
{
ParsedSystemId parsedSysid;
if (!parseSystemId(sysid, docCharset, (flags & ExtendEntityManager::isNdata) != 0,
0, mgr, parsedSysid)
|| !catalogManager_->mapCatalog(parsedSysid, this, mgr))
return 0;
return new ExternalInputSource(parsedSysid,
charset(),
docCharset,
internalCharsetIsDocCharset_,
codingSystemKit_->replacementChar(),
origin, flags);
}
ConstPtr<EntityCatalog>
EntityManagerImpl::makeCatalog(StringC &systemId,
const CharsetInfo &docCharset,
Messenger &mgr)
{
return catalogManager_->makeCatalog(systemId, docCharset, this, mgr);
}
Boolean
EntityManagerImpl::mergeSystemIds(const Vector<StringC> &sysids,
Boolean mapCatalogDocument,
const CharsetInfo &docCharset,
Messenger &mgr,
StringC &result) const
{
ParsedSystemId parsedSysid;
if (mapCatalogDocument) {
parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument;
}
for (size_t i = 0; i < sysids.size(); i++)
if (!parseSystemId(sysids[i],
docCharset,
0,
0,
mgr,
parsedSysid))
return 0;
parsedSysid.unparse(internalCharset(docCharset), 0, result);
return 1;
}
Boolean
EntityManagerImpl::expandSystemId(const StringC &str,
const Location &defLoc,
Boolean LOCALisNdata,
const CharsetInfo &docCharset,
const StringC *mapCatalogPublic,
Messenger &mgr,
StringC &result)
{
ParsedSystemId parsedSysid;
StorageObjectLocation defSoLoc;
const StorageObjectLocation *defSoLocP;
if (defLocation(defLoc, defSoLoc))
defSoLocP = &defSoLoc;
else
defSoLocP = 0;
if (!parseSystemId(str, docCharset, LOCALisNdata, defSoLocP, mgr, parsedSysid))
return 0;
if (mapCatalogPublic) {
ParsedSystemId::Map map;
map.type = ParsedSystemId::Map::catalogPublic;
map.publicId = *mapCatalogPublic;
parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map);
}
parsedSysid.unparse(internalCharset(docCharset), LOCALisNdata, result);
return 1;
}
Boolean EntityManagerImpl::parseSystemId(const StringC &str,
const CharsetInfo &docCharset,
Boolean LOCALisNdata,
const StorageObjectLocation *defLoc,
Messenger &mgr,
ParsedSystemId &parsedSysid) const
{
FSIParser fsiParser(str, internalCharset(docCharset), LOCALisNdata, defLoc, this, mgr);
return fsiParser.parse(parsedSysid);
}
StorageManager *
EntityManagerImpl::guessStorageType(const StringC &type,
const CharsetInfo &internalCharset) const
{
for (size_t i = 0; i < storageManagers_.size(); i++)
if (storageManagers_[i]->guessIsId(type, internalCharset))
return storageManagers_[i].pointer();
if (defaultStorageManager_->guessIsId(type, internalCharset))
return defaultStorageManager_.pointer();
return 0;
}
StorageManager *
EntityManagerImpl::lookupStorageType(const StringC &type,
const CharsetInfo &internalCharset) const
{
if (type.size() == 0)
return 0;
if (matchKey(type, defaultStorageManager_->type(), internalCharset))
return defaultStorageManager_.pointer();
for (size_t i = 0; i < storageManagers_.size(); i++)
if (matchKey(type, storageManagers_[i]->type(), internalCharset))
return storageManagers_[i].pointer();
return 0;
}
StorageManager *
EntityManagerImpl::lookupStorageType(const char *type) const
{
if (type == defaultStorageManager_->type())
return defaultStorageManager_.pointer();
for (size_t i = 0; i < storageManagers_.size(); i++)
if (type == storageManagers_[i]->type())
return storageManagers_[i].pointer();
return 0;
}
const InputCodingSystem *
EntityManagerImpl::lookupCodingSystem(const StringC &type,
const CharsetInfo &internalCharset,
Boolean isBctf,
const char *&name) const
{
return codingSystemKit_->makeInputCodingSystem(type, internalCharset, isBctf, name);
}
Boolean
EntityManagerImpl::matchKey(const StringC &type,
const char *s,
const CharsetInfo &internalCharset)
{
if (strlen(s) != type.size())
return false;
for (size_t i = 0; i < type.size(); i++)
if (internalCharset.execToDesc(toupper(s[i])) != type[i]
&& internalCharset.execToDesc(tolower(s[i])) != type[i])
return false;
return true;
}
void EntityManagerImpl::registerStorageManager(StorageManager *sm)
{
storageManagers_.resize(storageManagers_.size() + 1);
storageManagers_.back() = sm;
}
void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager)
{
catalogManager_ = catalogManager;
}
Boolean
EntityManagerImpl::defLocation(const Location &defLocation,
StorageObjectLocation &soLoc)
{
Offset off;
const ExternalInfo *info;
const Origin *origin = defLocation.origin().pointer();
Index index = defLocation.index();
for (;;) {
if (!origin)
return 0;
const InputSourceOrigin *inputSourceOrigin = origin->asInputSourceOrigin();
if (inputSourceOrigin) {
off = inputSourceOrigin->startOffset(index);
info = inputSourceOrigin->externalInfo();
if (info)
break;
if (!inputSourceOrigin->defLocation(off, origin, index))
return 0;
}
else {
const Location &parentLoc = origin->parent();
origin = parentLoc.origin().pointer();
index = parentLoc.index();
}
}
return ExtendEntityManager::externalize(info, off, soLoc);
}
class UnbufferingStorageObject : public StorageObject {
public:
UnbufferingStorageObject(StorageObject *sub,
const Boolean *unbuffer)
: sub_(sub), buf_(0), bufAvail_(0), bufNext_(0), unbuffer_(unbuffer) { }
~UnbufferingStorageObject() { delete [] buf_; }
Boolean read(char *buf, size_t bufSize, Messenger &mgr,
size_t &nread) {
if (bufNext_ >= bufAvail_) {
bufAvail_ = bufNext_ = 0;
if (!*unbuffer_)
return sub_->read(buf, bufSize, mgr, nread);
if (buf_ == 0)
buf_ = new char[bufSize_ = bufSize];
if (!sub_->read(buf_, bufSize_, mgr, bufAvail_))
return 0;
}
*buf = buf_[bufNext_++];
nread = 1;
return 1;
}
Boolean rewind(Messenger &mgr) {
bufAvail_ = bufNext_ = 0;
return sub_->rewind(mgr);
}
void willNotRewind() { sub_->willNotRewind(); }
size_t getBlockSize() const { return sub_->getBlockSize(); }
private:
Owner<StorageObject> sub_;
size_t bufSize_;
size_t bufAvail_;
size_t bufNext_;
char *buf_;
const Boolean *unbuffer_;
};
class MappingDecoder : public Decoder {
public:
MappingDecoder(Decoder *,
const ConstPtr<CharMapResource<Unsigned32> > &);
Boolean convertOffset(unsigned long &offset) const;
size_t decode(Char *, const char *, size_t, const char **);
private:
Owner<Decoder> sub_;
ConstPtr<CharMapResource<Unsigned32> > map_;
};
MappingDecoder::MappingDecoder(Decoder *sub,
const ConstPtr<CharMapResource<Unsigned32> > &map)
: Decoder(sub->minBytesPerChar()), sub_(sub), map_(map)
{
}
size_t MappingDecoder::decode(Char *to, const char *s,
size_t slen, const char **rest)
{
size_t n = sub_->decode(to, s, slen, rest);
const CharMap<Unsigned32> &map = *map_;
for (size_t i = 0; i < n; i++) {
Unsigned32 d = map[to[i]];
if (d & (unsigned(1) << 31))
to[i] = (d & ~(unsigned(1) << 31));
else
to[i] += d;
}
return n;
}
Boolean MappingDecoder::convertOffset(unsigned long &offset) const
{
return sub_->convertOffset(offset);
}
ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid,
const CharsetInfo &systemCharset,
const CharsetInfo &docCharset,
Boolean internalCharsetIsDocCharset,
Char replacementChar,
InputSourceOrigin *origin,
unsigned flags)
: InputSource(origin, 0, 0),
mayRewind_((flags & EntityManager::mayRewind) != 0),
mayNotExist_((flags & ExtendEntityManager::mayNotExist) != 0),
sov_(parsedSysid.size()),
internalCharsetIsDocCharset_(internalCharsetIsDocCharset),
// hack
maySetDocCharset_((flags & EntityManager::maySetDocCharset) != 0),
replacementChar_(replacementChar)
{
for (size_t i = 0; i < parsedSysid.size(); i++) {
if (parsedSysid[i].codingSystemType
!= (internalCharsetIsDocCharset
? StorageObjectSpec::bctf
: StorageObjectSpec::encoding)
&& parsedSysid[i].codingSystemType != StorageObjectSpec::special) {
map_ = new CharMapResource<Unsigned32>;
buildMap(systemCharset, docCharset);
break;
}
}
for (size_t i = 0; i < sov_.size(); i++)
sov_[i] = 0;
init();
info_ = new ExternalInfoImpl(parsedSysid);
origin->setExternalInfo(info_);
}
void ExternalInputSource::setDocCharset(const CharsetInfo &docCharset,
const CharsetInfo &systemCharset)
{
if (!map_.isNull())
buildMap(systemCharset, docCharset);
willNotSetDocCharset();
}
void ExternalInputSource::willNotSetDocCharset()
{
maySetDocCharset_ = 0;
}
void ExternalInputSource::buildMap(const CharsetInfo &systemCharset,
const CharsetInfo &docCharset)
{
CharMap<Unsigned32> &map = *map_;
// FIXME How should invalidChar be chosen when internalCharsetIsDocCharset_?
Char invalidChar
= internalCharsetIsDocCharset_ ? 0 : replacementChar_;
map.setAll((Unsigned32(1) << 31) | invalidChar);
if (internalCharsetIsDocCharset_)
buildMap1(systemCharset, docCharset);
else
buildMap1(docCharset, systemCharset);
}
void ExternalInputSource::buildMap1(const CharsetInfo &fromCharset,
const CharsetInfo &toCharset)
{
UnivCharsetDescIter iter(fromCharset.desc());
for (;;) {
WideChar descMin, descMax;
UnivChar univMin;
if (!iter.next(descMin, descMax, univMin))
break;
if (descMin > charMax)
break;
if (descMax > charMax)
descMax = charMax;
WideChar totalCount = 1 + (descMax - descMin);
do {
WideChar count;
WideChar toMin;
ISet<WideChar> set;
int nMap = toCharset.univToDesc(univMin, toMin, set, count);
if (count > totalCount)
count = totalCount;
if (nMap && toMin <= charMax) {
Char toMax;
if (count - 1 > charMax - toMin)
toMax = charMax;
else
toMax = toMin + (count - 1);
map_->setRange(descMin, descMin + (toMax - toMin), Char(toMin - descMin));
}
descMin += count;
univMin += count;
totalCount -= count;
} while (totalCount > 0);
}
}
void ExternalInputSource::init()
{
so_ = 0;
buf_ = 0;
bufSize_ = 0;
bufLim_ = 0;
bufLimOffset_ = 0;
insertRS_ = true;
soIndex_ = 0;
leftOver_ = 0;
nLeftOver_ = 0;
}
ExternalInputSource::~ExternalInputSource()
{
if (buf_)
delete [] buf_;
}
Boolean ExternalInputSource::rewind(Messenger &mgr)
{
reset(0, 0);
if (buf_)
delete [] buf_;
// reset makes a new EntityOrigin
ParsedSystemId parsedSysid(info_->parsedSystemId());
ExternalInfoImpl *oldInfo = info_;
info_ = new ExternalInfoImpl(parsedSysid);
so_ = 0;
for (size_t i = 0; i < soIndex_; i++) {
if (sov_[i] && !sov_[i]->rewind(mgr))
return 0;
StringC tem;
oldInfo->getId(i, tem);
info_->setId(i, tem);
}
inputSourceOrigin()->setExternalInfo(info_);
init();
return 1;
}
void ExternalInputSource::willNotRewind()
{
for (size_t i = 0; i < sov_.size(); i++)
if (sov_[i])
sov_[i]->willNotRewind();
mayRewind_ = 0;
}
// Round up N so that it is a power of TO.
// TO must be a power of 2.
inline
size_t roundUp(size_t n, size_t to)
{
return (n + (to - 1)) & ~(to - 1);
}
inline
void ExternalInputSource::noteRSAt(const Char *p)
{
info_->noteRS(bufLimOffset_ - (bufLim_ - p));
}
inline
void ExternalInputSource::noteRS()
{
noteRSAt(cur());
}
Xchar ExternalInputSource::fill(Messenger &mgr)
{
ASSERT(cur() == end());
while (end() >= bufLim_) {
// need more data
while (so_ == 0) {
if (soIndex_ >= sov_.size())
return eE;
if (soIndex_ > 0)
info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end()));
const StorageObjectSpec &spec = info_->spec(soIndex_);
if (!sov_[soIndex_]) {
StringC id;
if (mayNotExist_) {
NullMessenger nullMgr;
sov_[soIndex_]
= spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
spec.search,
mayRewind_, nullMgr, id);
}
else
sov_[soIndex_]
= spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
spec.search,
mayRewind_, mgr, id);
info_->setId(soIndex_, id);
}
so_ = sov_[soIndex_].pointer();
if (so_) {
decoder_ = spec.codingSystem->makeDecoder();
if (spec.codingSystemType != StorageObjectSpec::special
&& spec.codingSystemType != (internalCharsetIsDocCharset_
? StorageObjectSpec::bctf
: StorageObjectSpec::encoding)) {
decoder_ = new MappingDecoder(decoder_, map_);
if (maySetDocCharset_) {
sov_[soIndex_] = new UnbufferingStorageObject(sov_[soIndex_].extract(), &maySetDocCharset_);
so_ = sov_[soIndex_].pointer();
}
}
info_->setDecoder(soIndex_, decoder_);
zapEof_ = spec.zapEof;
switch (spec.records) {
case StorageObjectSpec::asis:
recordType_ = asis;
insertRS_ = false;
break;
case StorageObjectSpec::cr:
recordType_ = cr;
break;
case StorageObjectSpec::lf:
recordType_ = lf;
break;
case StorageObjectSpec::crlf:
recordType_ = crlf;
break;
case StorageObjectSpec::find:
recordType_ = unknown;
break;
default:
CANNOT_HAPPEN();
}
soIndex_++;
readSize_ = so_->getBlockSize();
nLeftOver_ = 0;
break;
}
else
setAccessError();
soIndex_++;
}
size_t keepSize = end() - start();
const size_t align = sizeof(int)/sizeof(Char);
size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char);
readSizeChars = roundUp(readSizeChars, align);
size_t neededSize; // in Chars
size_t startOffset;
// compute neededSize and readSize
unsigned minBytesPerChar = decoder_->minBytesPerChar();
if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) {
// In this case we want to do decoding in place.
// FIXME It might be a win on some systems (Irix?) to arrange that the
// read buffer is on a page boundary.
if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_)
abort(); // FIXME throw an exception
// Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0
if (readSizeChars
> size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize)
abort();
neededSize = roundUp(readSizeChars + keepSize + insertRS_, align);
startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_)
- readSizeChars - insertRS_ - keepSize);
}
else {
// Needs to be room for everything before decoding.
neededSize = (keepSize + insertRS_ + readSizeChars
+ (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char));
// Also must be room for everything after decoding.
size_t neededSize2
= (keepSize + insertRS_
// all the converted characters
+ (nLeftOver_ + readSize_)/minBytesPerChar
// enough Chars to contain left over bytes
+ ((readSize_ % minBytesPerChar + sizeof(Char) - 1)
/ sizeof(Char)));
if (neededSize2 > neededSize)
neededSize = neededSize2;
neededSize = roundUp(neededSize, align);
if (neededSize > size_t(-1)/sizeof(Char))
abort();
startOffset = 0;
}
if (bufSize_ < neededSize)
reallocateBuffer(neededSize);
Char *newStart = buf_ + startOffset;
if (newStart != start() && keepSize > 0)
memmove(newStart, start(), keepSize*sizeof(Char));
char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_;
if (nLeftOver_ > 0 && leftOver_ != bytesStart)
memmove(bytesStart, leftOver_, nLeftOver_);
moveStart(newStart);
bufLim_ = end();
size_t nread;
if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_,
mgr, nread)) {
if (nread > 0) {
const char *bytesEnd = bytesStart + nLeftOver_ + nread;
size_t nChars = decoder_->decode((Char *)end() + insertRS_,
bytesStart,
nLeftOver_ + nread
- (zapEof_ && bytesEnd[-1] == EOFCHAR),
&leftOver_);
nLeftOver_ = bytesEnd - leftOver_;
if (nChars > 0) {
if (insertRS_) {
noteRS();
*(Char *)end() = RS;
advanceEnd(end() + 1);
insertRS_ = false;
bufLim_ += 1;
bufLimOffset_ += 1;
}
bufLim_ += nChars;
bufLimOffset_ += nChars;
break;
}
}
}
else
so_ = 0;
}
ASSERT(end() < bufLim_);
if (insertRS_) {
noteRS();
insertChar(RS);
insertRS_ = false;
bufLimOffset_ += 1;
}
switch (recordType_) {
case unknown:
{
const Char *e = findNextCrOrLf(end(), bufLim_);
if (e) {
if (*e == '\n') {
recordType_ = lf;
info_->noteInsertedRSs();
*(Char *)e = RE;
advanceEnd(e + 1);
insertRS_ = true;
}
else {
if (e + 1 < bufLim_) {
if (e[1] == '\n') {
recordType_ = crlf;
advanceEnd(e + 1);
if (e + 2 == bufLim_) {
bufLim_--;
bufLimOffset_--;
insertRS_ = true;
}
}
else {
advanceEnd(e + 1);
recordType_ = cr;
info_->noteInsertedRSs();
insertRS_ = true;
}
}
else {
recordType_ = crUnknown;
advanceEnd(e + 1);
}
}
}
else
advanceEnd(bufLim_);
}
break;
case crUnknown:
{
if (*cur() == '\n') {
noteRS();
advanceEnd(cur() + 1);
recordType_ = crlf;
}
else {
advanceEnd(cur() + 1);
insertRS_ = true;
recordType_ = cr;
info_->noteInsertedRSs();
}
}
break;
case lf:
{
Char *e = (Char *)findNextLf(end(), bufLim_);
if (e) {
advanceEnd(e + 1);
*e = RE;
insertRS_ = true;
}
else
advanceEnd(bufLim_);
}
break;
case cr:
{
const Char *e = findNextCr(end(), bufLim_);
if (e) {
advanceEnd(e + 1);
insertRS_ = true;
}
else
advanceEnd(bufLim_);
}
break;
case crlf:
{
const Char *e = end();
for (;;) {
e = findNextLf(e, bufLim_);
if (!e) {
advanceEnd(bufLim_);
break;
}
// Need to delete final RS if not followed by anything.
if (e + 1 == bufLim_) {
bufLim_--;
bufLimOffset_--;
advanceEnd(e);
insertRS_ = true;
if (cur() == end())
return fill(mgr);
break;
}
noteRSAt(e);
e++;
}
}
break;
case asis:
advanceEnd(bufLim_);
break;
default:
CANNOT_HAPPEN();
}
ASSERT(cur() < end());
return nextChar();
}
const Char *ExternalInputSource::findNextCr(const Char *start,
const Char *end)
{
for (; start < end; start++)
if (*start == '\r')
return start;
return 0;
}
const Char *ExternalInputSource::findNextLf(const Char *start,
const Char *end)
{
for (; start < end; start++)
if (*start == '\n')
return start;
return 0;
}
const Char *ExternalInputSource::findNextCrOrLf(const Char *start,
const Char *end)
{
for (; start < end; start++)
if (*start == '\n' || *start == '\r')
return start;
return 0;
}
void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref)
{
ASSERT(cur() == start());
noteCharRef(startIndex() + (cur() - start()), ref);
insertChar(ch);
}
void ExternalInputSource::insertChar(Char ch)
{
if (start() > buf_) {
if (cur() > start())
memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char));
moveLeft();
*(Char *)cur() = ch;
}
else {
// must have start == buf
if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char))
== bufLim_) {
if (bufSize_ == size_t(-1))
abort(); // FIXME throw an exception
reallocateBuffer(bufSize_ + 1);
}
else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) {
char *s = (char *)(buf_ + bufSize_) - nLeftOver_;
memmove(s, leftOver_, nLeftOver_);
leftOver_ = s;
}
if (cur() < bufLim_)
memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char));
*(Char *)cur() = ch;
advanceEnd(end() + 1);
bufLim_ += 1;
}
}
void ExternalInputSource::reallocateBuffer(size_t newSize)
{
Char *newBuf = new Char[newSize];
memcpy(newBuf, buf_, bufSize_*sizeof(Char));
bufSize_ = newSize;
changeBuffer(newBuf, buf_);
bufLim_ = newBuf + (bufLim_ - buf_);
if (nLeftOver_ > 0) {
char *s = (char *)(newBuf + bufSize_) - nLeftOver_;
memmove(s,
(char *)newBuf + (leftOver_ - (char *)buf_),
nLeftOver_);
leftOver_ = s;
}
delete [] buf_;
buf_ = newBuf;
}
RTTI_DEF1(ExternalInfoImpl, ExternalInfo)
ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid)
: currentIndex_(0), position_(parsedSysid.size())
{
parsedSysid.swap(parsedSysid_);
if (parsedSysid_.size() > 0)
notrack_ = parsedSysid_[0].notrack;
}
void ExternalInfoImpl::setId(size_t i, StringC &id)
{
Mutex::Lock lock(&mutex_);
id.swap(position_[i].id);
}
void ExternalInfoImpl::getId(size_t i, StringC &id) const
{
Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_);
id = position_[i].id;
}
void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder)
{
Mutex::Lock lock(&mutex_);
position_[i].decoder = decoder;
}
void ExternalInfoImpl::noteInsertedRSs()
{
position_[currentIndex_].insertedRSs = 1;
}
void ExternalInfoImpl::noteRS(Offset offset)
{
// We do the locking in OffsetOrderedList.
if (!notrack_)
rsList_.append(offset);
if (offset
== (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset))
position_[currentIndex_].startsWithRS = 1;
}
void ExternalInfoImpl::noteStorageObjectEnd(Offset offset)
{
Mutex::Lock lock(&mutex_);
ASSERT(currentIndex_ < position_.size());
// The last endOffset_ must be -1.
if (currentIndex_ < position_.size() - 1) {
position_[currentIndex_++].endOffset = offset;
position_[currentIndex_].line1RS = rsList_.size();
notrack_ = parsedSysid_[currentIndex_].notrack;
}
}
Boolean ExternalInfoImpl::convertOffset(Offset off,
StorageObjectLocation &ret) const
{
Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_);
if (off == Offset(-1) || position_.size() == 0)
return false;
// the last endOffset_ is Offset(-1), so this will
// terminate
int i;
for (i = 0; off >= position_[i].endOffset; i++)
;
for (; position_[i].id.size() == 0; i--)
if (i == 0)
return false;
ret.storageObjectSpec = &parsedSysid_[i];
ret.actualStorageId = position_[i].id;
Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset;
ret.storageObjectOffset = off - startOffset;
ret.byteIndex = ret.storageObjectOffset;
if (parsedSysid_[i].notrack
|| parsedSysid_[i].records == StorageObjectSpec::asis) {
ret.lineNumber = (unsigned long)-1;
if (parsedSysid_[i].records != StorageObjectSpec::asis) {
if (position_[i].insertedRSs)
ret.byteIndex = (unsigned long)-1;
else if (ret.byteIndex > 0 && position_[i].startsWithRS)
ret.byteIndex--; // first RS is inserted
}
ret.columnNumber = (unsigned long)-1;
return true;
}
else {
size_t line1RS = position_[i].line1RS;
// line1RS is now the number of RSs that are before or on the current line.
size_t j;
Offset colStart;
if (rsList_.findPreceding(off, j, colStart)) {
if (position_[i].insertedRSs)
ret.byteIndex -= j + 1 - line1RS;
else if (ret.byteIndex > 0 && position_[i].startsWithRS)
ret.byteIndex--; // first RS is inserted
j++;
colStart++;
}
else {
j = 0;
colStart = 0;
}
// j is now the number of RSs that are before or on the current line
// colStart is the offset of the first column
ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS;
// the offset of the first column
if (colStart < startOffset)
colStart = startOffset;
// the RS that starts a line will be in column 0;
// the first real character of a line will be column 1
ret.columnNumber = 1 + off - colStart;
}
if (!position_[i].decoder
|| !position_[i].decoder->convertOffset(ret.byteIndex))
ret.byteIndex = (unsigned long)-1;
return true;
}
const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const
{
return parsedSysid_[i];
}
size_t ExternalInfoImpl::nSpecs() const
{
return parsedSysid_.size();
}
const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const
{
return parsedSysid_;
}
StorageObjectSpec::StorageObjectSpec()
: storageManager(0), codingSystem(0), codingSystemName(0), notrack(0),
records(find), zapEof(1), search(1)
{
}
StorageObjectPosition::StorageObjectPosition()
: endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0)
{
}
FSIParser::FSIParser(const StringC &str,
const CharsetInfo &idCharset,
Boolean isNdata,
const StorageObjectLocation *defLoc,
const EntityManagerImpl *em,
Messenger &mgr)
: str_(str),
strIndex_(0),
idCharset_(idCharset),
isNdata_(isNdata),
defSpec_(defLoc ? defLoc->storageObjectSpec : 0),
defId_(defLoc ? &defLoc->actualStorageId : 0),
em_(em),
mgr_(mgr)
{
}
Xchar FSIParser::get()
{
if (strIndex_ < str_.size())
return str_[strIndex_++];
else
return -1;
}
void FSIParser::unget()
{
if (strIndex_ > 0)
strIndex_ -= 1;
}
Boolean FSIParser::matchKey(const StringC &str, const char *s)
{
if (strlen(s) != str.size())
return false;
for (size_t i = 0; i < str.size(); i++)
if (idCharset_.execToDesc(toupper(s[i])) != str[i]
&& idCharset_.execToDesc(tolower(s[i])) != str[i])
return false;
return true;
}
Boolean FSIParser::matchChar(Xchar ch, char execC)
{
return ch == idCharset_.execToDesc(execC);
}
Boolean FSIParser::isS(Xchar c)
{
return (matchChar(c, ' ')
|| matchChar(c, '\r')
|| matchChar(c, '\n')
|| matchChar(c, ' '));
}
Boolean FSIParser::convertDigit(Xchar c, int &weight)
{
static const char digits[] = "0123456789";
for (int i = 0; digits[i] != '\0'; i++)
if (matchChar(c, digits[i])) {
weight = i;
return 1;
}
return 0;
}
Boolean FSIParser::parse(ParsedSystemId &parsedSysid)
{
size_t startIndex = strIndex_;
if (!matchChar(get(), '<'))
return handleInformal(startIndex, parsedSysid);
StringC key;
for (;;) {
Xchar c = get();
if (c == -1)
return handleInformal(startIndex, parsedSysid);
if (isS(c) || matchChar(c, '>'))
break;
key += Char(c);
}
unget();
if (matchKey(key, "CATALOG")) {
if (!setCatalogAttributes(parsedSysid))
return 0;
return parse(parsedSysid);
}
Boolean neutral;
StorageManager *sm = lookupStorageType(key, neutral);
if (!sm)
return handleInformal(startIndex, parsedSysid);
for (;;) {
parsedSysid.resize(parsedSysid.size() + 1);
StorageObjectSpec &sos = parsedSysid.back();
sos.storageManager = sm;
Xchar smcrd;
Boolean fold;
if (!setAttributes(sos, neutral, smcrd, fold))
return 0;
sm = 0;
StringC id;
Boolean hadData = 0;
for (;;) {
Xchar c = get();
if (c == -1)
break;
if (matchChar(c, '<')) {
hadData = 1;
Char stago = c;
key.resize(0);
for (;;) {
c = get();
if (c == -1) {
id += stago;
id += key;
break;
}
if (isS(c) || matchChar(c, '>')) {
unget();
sm = lookupStorageType(key, neutral);
if (!sm) {
id += stago;
id += key;
}
break;
}
key += c;
}
if (sm)
break;
}
else if (!((!hadData && matchChar(c, '\r')) // ignored RE
|| matchChar(c, '\n') )) { // ignored RS
hadData = 1;
id += c;
}
}
if (id.size() > 0 && matchChar(id[id.size() - 1], '\r'))
id.resize(id.size() - 1);
uncharref(id);
id.swap(sos.specId);
if (!convertId(sos.specId, smcrd, sos.storageManager))
return 0;
if (neutral) {
if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_))
return 0;
}
if (sos.storageManager->resolveRelative(sos.baseId, sos.specId,
sos.search))
sos.baseId.resize(0);
if (!sm)
break;
}
return 1;
}
Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid)
{
parsedSysid.resize(parsedSysid.size() + 1);
StorageObjectSpec &sos = parsedSysid.back();
sos.specId.assign(str_.data() + index,
str_.size() - index);
sos.storageManager = em_->guessStorageType(sos.specId, idCharset_);
if (!sos.storageManager) {
if (defSpec_ && defSpec_->storageManager->inheritable())
sos.storageManager = defSpec_->storageManager;
else
sos.storageManager = em_->defaultStorageManager_.pointer();
}
setDefaults(sos);
if (!convertId(sos.specId, -1, sos.storageManager))
return 0;
if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search))
sos.baseId.resize(0);
return 1;
}
StorageManager *FSIParser::lookupStorageType(const StringC &key,
Boolean &neutral)
{
if (matchKey(key, "NEUTRAL")) {
neutral = 1;
if (defSpec_ && defSpec_->storageManager->inheritable())
return defSpec_->storageManager;
else
return em_->defaultStorageManager_.pointer();
}
else {
StorageManager *sm = em_->lookupStorageType(key, idCharset_);
if (sm)
neutral = 0;
return sm;
}
}
Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid)
{
Boolean hadPublic = 0;
parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument;
for (;;) {
StringC token, value;
Boolean gotValue;
if (!parseAttribute(token, gotValue, value)) {
mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
return 0;
}
if (token.size() == 0)
break;
if (matchKey(token, "PUBLIC")) {
if (hadPublic)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("PUBLIC")));
else if (gotValue) {
convertMinimumLiteral(value, parsedSysid.maps.back().publicId);
parsedSysid.maps.back().type = ParsedSystemId::Map::catalogPublic;
}
else
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
hadPublic = 1;
}
else
mgr_.message(gotValue
? EntityManagerMessages::fsiUnsupportedAttribute
: EntityManagerMessages::fsiUnsupportedAttributeToken,
StringMessageArg(token));
}
return 1;
}
void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to)
{
// Do just enough to ensure it can be reparsed.
to.resize(0);
for (size_t i = 0; i < from.size(); i++) {
Char c = from[i];
if (matchChar(c, '"') || matchChar(c, '#'))
mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c));
else if (matchChar(c, ' ')) {
if (to.size() && to[to.size() - 1] != c)
to += c;
}
else
to += c;
}
if (to.size() && matchChar(to[to.size() - 1], ' '))
to.resize(to.size() - 1);
}
// FIXME This should be table driven.
Boolean FSIParser::setAttributes(StorageObjectSpec &sos,
Boolean neutral,
Xchar &smcrd,
Boolean &fold)
{
Boolean hadBctf = 0;
Boolean hadEncoding = 0;
Boolean hadTracking = 0;
Boolean hadSmcrd = 0;
smcrd = -1;
fold = 1;
Boolean hadRecords = 0;
Boolean hadBase = 0;
Boolean hadZapeof = 0;
Boolean hadSearch = 0;
Boolean hadFold = 0;
StorageObjectSpec::Records records;
setDefaults(sos);
for (;;) {
StringC token, value;
Boolean gotValue;
if (!parseAttribute(token, gotValue, value)) {
mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
return 0;
}
if (token.size() == 0)
break;
if (matchKey(token, "BCTF")) {
if (sos.storageManager->requiredCodingSystem())
mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable);
else if (hadBctf)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (hadEncoding)
mgr_.message(EntityManagerMessages::fsiBctfAndEncoding);
else if (gotValue) {
const char *codingSystemName;
const InputCodingSystem *codingSystem
= em_->lookupCodingSystem(value, idCharset_, 1, codingSystemName);
if (codingSystem) {
sos.codingSystem = codingSystem;
sos.codingSystemName = codingSystemName;
sos.codingSystemType = StorageObjectSpec::bctf;
}
else if (matchKey(value, "SAME")) {
if (!isNdata_) {
if (defSpec_) {
sos.codingSystem = defSpec_->codingSystem;
sos.codingSystemName = defSpec_->codingSystemName;
sos.codingSystemType = defSpec_->codingSystemType;
}
else {
sos.codingSystem = em_->defaultCodingSystem_;
sos.codingSystemName = 0;
sos.codingSystemType = (em_->internalCharsetIsDocCharset_
? StorageObjectSpec::bctf
: StorageObjectSpec::encoding);
}
}
}
else
mgr_.message(EntityManagerMessages::fsiUnknownBctf,
StringMessageArg(value));
}
else
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
hadBctf = 1;
}
else if (matchKey(token, "ENCODING")) {
if (sos.storageManager->requiredCodingSystem())
mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable);
else if (hadEncoding)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (hadBctf)
mgr_.message(EntityManagerMessages::fsiBctfAndEncoding);
else if (gotValue) {
const char *codingSystemName;
const InputCodingSystem *codingSystem
= em_->lookupCodingSystem(value, idCharset_, 0, codingSystemName);
if (codingSystem) {
sos.codingSystem = codingSystem;
sos.codingSystemName = codingSystemName;
sos.codingSystemType = StorageObjectSpec::encoding;
}
else if (matchKey(value, "SAME")) {
if (!isNdata_) {
if (defSpec_) {
sos.codingSystem = defSpec_->codingSystem;
sos.codingSystemName = defSpec_->codingSystemName;
sos.codingSystemType = defSpec_->codingSystemType;
}
else {
sos.codingSystem = em_->defaultCodingSystem_;
sos.codingSystemName = 0;
sos.codingSystemType = (em_->internalCharsetIsDocCharset_
? StorageObjectSpec::bctf
: StorageObjectSpec::encoding);
}
}
}
else
mgr_.message(EntityManagerMessages::fsiUnknownEncoding,
StringMessageArg(value));
}
else
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
hadEncoding = 1;
}
else if (matchKey(token, "TRACKING")) {
if (hadTracking)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue) {
if (matchKey(value, "NOTRACK"))
sos.notrack = 1;
else if (!matchKey(value, "TRACK"))
mgr_.message(EntityManagerMessages::fsiBadTracking,
StringMessageArg(value));
}
else
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
hadTracking = 1;
}
else if (matchKey(token, "ZAPEOF")) {
if (sos.storageManager->requiredCodingSystem())
mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
else if (hadZapeof)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue) {
if (matchKey(value, "ZAPEOF"))
sos.zapEof = 1;
else if (matchKey(value, "NOZAPEOF"))
sos.zapEof = 0;
else
mgr_.message(EntityManagerMessages::fsiBadZapeof,
StringMessageArg(value));
}
else
sos.zapEof = 1;
hadZapeof = 1;
}
else if (matchKey(token, "NOZAPEOF")) {
if (sos.storageManager->requiredCodingSystem())
mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
else if (hadZapeof)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("ZAPEOF")));
else if (gotValue)
mgr_.message(EntityManagerMessages::fsiValueAsName,
StringMessageArg(token));
else
sos.zapEof = 0;
hadZapeof = 1;
}
else if (matchKey(token, "SEARCH")) {
if (hadSearch)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue) {
if (matchKey(value, "SEARCH"))
sos.search = 1;
else if (matchKey(value, "NOSEARCH"))
sos.search = 0;
else
mgr_.message(EntityManagerMessages::fsiBadSearch,
StringMessageArg(value));
}
else
sos.search = 1;
hadSearch = 1;
}
else if (matchKey(token, "NOSEARCH")) {
if (hadSearch)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("SEARCH")));
else if (gotValue)
mgr_.message(EntityManagerMessages::fsiValueAsName,
StringMessageArg(token));
else
sos.search = 0;
hadSearch = 1;
}
else if (matchKey(token, "FOLD")) {
if (!neutral)
mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
else if (hadFold)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue) {
if (matchKey(value, "FOLD"))
fold = 1;
else if (matchKey(value, "NOFOLD"))
fold = 0;
else
mgr_.message(EntityManagerMessages::fsiBadFold,
StringMessageArg(value));
}
else
fold = 1;
hadFold = 1;
}
else if (matchKey(token, "NOFOLD")) {
if (!neutral)
mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
else if (hadFold)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("FOLD")));
else if (gotValue)
mgr_.message(EntityManagerMessages::fsiValueAsName,
StringMessageArg(token));
else
fold = 0;
hadFold = 1;
}
else if (matchKey(token, "SMCRD")) {
if (hadSmcrd)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue) {
if (value.size() == 0)
smcrd = -1;
else if (value.size() == 1)
smcrd = value[0];
else
mgr_.message(EntityManagerMessages::fsiBadSmcrd,
StringMessageArg(value));
}
else
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
hadSmcrd = 1;
}
else if (matchKey(token, "RECORDS")) {
if (sos.storageManager->requiresCr())
mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
else if (hadRecords)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue) {
if (!lookupRecords(value, sos.records))
mgr_.message(EntityManagerMessages::fsiUnsupportedRecords,
StringMessageArg(value));
}
else
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
hadRecords = 1;
}
else if (matchKey(token, "SOIBASE")) {
if (hadBase)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(token));
else if (gotValue)
value.swap(sos.baseId);
else {
mgr_.message(EntityManagerMessages::fsiMissingValue,
StringMessageArg(token));
sos.baseId.resize(0);
}
hadBase = 1;
}
else if (lookupRecords(token, records)) {
if (sos.storageManager->requiresCr())
mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
else if (hadRecords)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("RECORDS")));
else if (!gotValue)
sos.records = records;
else
mgr_.message(EntityManagerMessages::fsiValueAsName,
StringMessageArg(token));
hadRecords = 1;
}
else if (matchKey(token, "NOTRACK")) {
if (hadTracking)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("TRACKING")));
else if (!gotValue)
sos.notrack = 1;
else
mgr_.message(EntityManagerMessages::fsiValueAsName,
StringMessageArg(token));
hadTracking = 1;
}
else if (matchKey(token, "TRACK")) {
if (hadTracking)
mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
StringMessageArg(idCharset_.execToDesc("TRACKING")));
else if (gotValue)
mgr_.message(EntityManagerMessages::fsiValueAsName,
StringMessageArg(token));
hadTracking = 1;
}
else
mgr_.message(gotValue
? EntityManagerMessages::fsiUnsupportedAttribute
: EntityManagerMessages::fsiUnsupportedAttributeToken,
StringMessageArg(token));
}
if (hadBase && sos.baseId.size() > 0) {
convertId(sos.baseId, smcrd, sos.storageManager);
if (neutral) {
if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_))
sos.baseId.resize(0);
}
}
if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis)
sos.zapEof = 0;
return 1;
}
FSIParser::RecordType FSIParser::recordTypeTable[] = {
{ "FIND", StorageObjectSpec::find },
{ "ASIS", StorageObjectSpec::asis },
{ "CR", StorageObjectSpec::cr },
{ "LF", StorageObjectSpec::lf },
{ "CRLF", StorageObjectSpec::crlf }
};
const char *FSIParser::recordsName(StorageObjectSpec::Records records)
{
for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
if (records == recordTypeTable[i].value)
return recordTypeTable[i].name;
return 0;
}
Boolean FSIParser::lookupRecords(const StringC &token,
StorageObjectSpec::Records &result)
{
for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
if (matchKey(token, recordTypeTable[i].name)) {
result = recordTypeTable[i].value;
return 1;
}
return 0;
}
void FSIParser::setDefaults(StorageObjectSpec &sos)
{
if (sos.storageManager->requiresCr())
sos.records = StorageObjectSpec::cr;
else if (isNdata_
|| (defSpec_ && defSpec_->records == StorageObjectSpec::asis))
sos.records = StorageObjectSpec::asis;
if (isNdata_ || (defSpec_ && !defSpec_->zapEof))
sos.zapEof = 0;
if (defSpec_ && defSpec_->storageManager == sos.storageManager) {
if (defId_)
sos.baseId = *defId_;
else {
sos.baseId = defSpec_->specId;
sos.storageManager->resolveRelative(defSpec_->baseId,
sos.baseId,
0);
}
}
sos.codingSystem = sos.storageManager->requiredCodingSystem();
if (sos.codingSystem) {
sos.zapEof = 0; // hack
sos.codingSystemType = StorageObjectSpec::special;
}
else {
sos.codingSystem = em_->defaultCodingSystem_;
sos.codingSystemType
= (em_->internalCharsetIsDocCharset_
? StorageObjectSpec::bctf
: StorageObjectSpec::encoding);
if (isNdata_) {
sos.codingSystem = em_->codingSystemKit_->identityInputCodingSystem();
sos.codingSystemType = StorageObjectSpec::special;
}
else if (defSpec_) {
sos.codingSystem = defSpec_->codingSystem;
sos.codingSystemName = defSpec_->codingSystemName;
sos.codingSystemType = defSpec_->codingSystemType;
}
}
}
Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue,
StringC &value)
{
Xchar c = get();
while (isS(c))
c = get();
if (c == -1) {
return 0;
}
token.resize(0);
if (matchChar(c, '>'))
return 1;
if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '='))
return 0;
for (;;) {
token += c;
c = get();
if (c == -1)
return 0;
if (isS(c))
break;
if (matchChar(c, '>') || matchChar(c, '='))
break;
}
while (isS(c))
c = get();
if (c == -1)
return 0;
if (!matchChar(c, '=')) {
unget();
gotValue = 0;
return 1;
}
gotValue = 1;
value.resize(0);
c = get();
while (isS(c))
c = get();
if (matchChar(c, '>') || matchChar(c, '='))
return 0;
if (matchChar(c, '"') || matchChar(c, '\'')) {
Char lit = c;
for (;;) {
Xchar c = get();
if (c == lit)
break;
if (c == -1)
return 0;
if (matchChar(c, '\n'))
;
else if (matchChar(c, '\r') || matchChar(c, '\t'))
value += idCharset_.execToDesc(' ');
else
value += c;
}
uncharref(value);
}
else {
for (;;) {
value += c;
c = get();
if (c == -1)
return 0;
if (isS(c))
break;
if (matchChar(c, '>') || matchChar(c, '=')) {
unget();
break;
}
}
}
return 1;
}
void FSIParser::uncharref(StringC &str)
{
size_t j = 0;
size_t i = 0;
while (i < str.size()) {
int digit;
if (matchChar(str[i], '&')
&& i + 2 < str.size()
&& matchChar(str[i + 1], '#')
&& convertDigit(str[i + 2], digit)) {
unsigned long val = digit;
i += 3;
while (i < str.size() && convertDigit(str[i], digit)) {
val = val*10 + digit;
i++;
}
str[j++] = val;
if (i < str.size() && matchChar(str[i], ';'))
i++;
}
else
str[j++] = str[i++];
}
str.resize(j);
}
Boolean FSIParser::convertId(StringC &id, Xchar smcrd,
const StorageManager *sm)
{
const CharsetInfo *smCharset = sm->idCharset();
StringC newId;
size_t i = 0;
while (i < id.size()) {
UnivChar univ;
WideChar wide;
ISet<WideChar> wideSet;
int digit;
if (Xchar(id[i]) == smcrd
&& i + 1 < id.size()
&& convertDigit(id[i + 1], digit)) {
i += 2;
Char val = digit;
while (i < id.size() && convertDigit(id[i], digit)) {
val = val*10 + digit;
i++;
}
newId += val;
if (i < id.size() && matchChar(id[i], ';'))
i++;
}
else if (smCharset) {
if (!idCharset_.descToUniv(id[i++], univ))
return 0;
if (univ == UnivCharsetDesc::rs)
;
else if (univ == UnivCharsetDesc::re && sm->reString())
newId += *sm->reString();
else if (smCharset->univToDesc(univ, wide, wideSet) != 1
|| wide > charMax)
return 0; // FIXME give error
else
newId += Char(wide);
}
else
newId += id[i++];
}
newId.swap(id);
return 1;
}
ParsedSystemId:: ParsedSystemId()
{
}
static
void unparseSoi(const StringC &soi,
const CharsetInfo *idCharset,
const CharsetInfo &resultCharset,
StringC &result,
Boolean &needSmcrd);
void ParsedSystemId::unparse(const CharsetInfo &resultCharset,
Boolean isNdata,
StringC &result) const
{
size_t len = size();
result.resize(0);
size_t i;
for (i = 0; i < maps.size(); i++) {
if (maps[i].type == Map::catalogDocument)
result += resultCharset.execToDesc("<CATALOG>");
else if (maps[i].type == Map::catalogPublic) {
result += resultCharset.execToDesc("<CATALOG PUBLIC=\"");
result += maps[i].publicId;
result += resultCharset.execToDesc("\">");
}
}
for (i = 0; i < len; i++) {
const StorageObjectSpec &sos = (*this)[i];
result += resultCharset.execToDesc('<');
result += resultCharset.execToDesc(sos.storageManager->type());
if (sos.notrack)
result += resultCharset.execToDesc(" NOTRACK");
if (!sos.search)
result += resultCharset.execToDesc(" NOSEARCH");
if (!sos.storageManager->requiresCr()
&& sos.records != (isNdata ? StorageObjectSpec::asis : StorageObjectSpec::find)) {
result += resultCharset.execToDesc(' ');
result += resultCharset.execToDesc(FSIParser::recordsName(sos.records));
}
if (sos.codingSystemName && sos.codingSystemType != StorageObjectSpec::special) {
if (!sos.zapEof)
result += resultCharset.execToDesc(" NOZAPEOF");
result += resultCharset.execToDesc(sos.codingSystemType == StorageObjectSpec::bctf
? " BCTF="
: " ENCODING=");
result += resultCharset.execToDesc(sos.codingSystemName);
}
Boolean needSmcrd = 0;
if (sos.baseId.size() != 0) {
result += resultCharset.execToDesc(" SOIBASE='");
unparseSoi(sos.baseId,
sos.storageManager->idCharset(),
resultCharset,
result,
needSmcrd);
result += resultCharset.execToDesc('\'');
}
StringC tem;
unparseSoi(sos.specId,
sos.storageManager->idCharset(),
resultCharset,
tem,
needSmcrd);
if (needSmcrd)
result += resultCharset.execToDesc(" SMCRD='^'");
result += resultCharset.execToDesc('>');
result += tem;
}
}
void unparseSoi(const StringC &soi,
const CharsetInfo *idCharset,
const CharsetInfo &resultCharset,
StringC &result,
Boolean &needSmcrd)
{
if (!idCharset) {
for (size_t i = 0; i < soi.size(); i++) {
char buf[32];
sprintf(buf, "&#%lu;", (unsigned long)soi[i]);
result += resultCharset.execToDesc(buf);
}
return;
}
for (size_t i = 0; i < soi.size(); i++) {
UnivChar univ;
WideChar to;
ISet<WideChar> toSet;
if (!idCharset->descToUniv(soi[i], univ)
|| univ >= 127
|| univ < 32
|| univ == 36 // $
|| univ == 96 // `
#ifndef MSDOS_FILENAMES
|| univ == 92 // backslash
#endif
|| univ == 94 // ^
|| resultCharset.univToDesc(univ, to, toSet) != 1) {
needSmcrd = 1;
char buf[32];
sprintf(buf, "^%lu;", (unsigned long)soi[i]);
result += resultCharset.execToDesc(buf);
}
else {
switch (univ) {
case 34: // double quote
case 35: // #
case 39: // apostrophe
case 60: // <
{
char buf[32];
sprintf(buf, "&#%lu;", (unsigned long)to);
result += resultCharset.execToDesc(buf);
}
break;
default:
result += Char(to);
break;
}
}
}
}
#ifdef SP_NAMESPACE
}
#endif