/**
@file uemf_utf.c
@brief Functions for manipulating UTF and various types of text.
Compile with "U_VALGRIND" defined defined to enable code which lets valgrind check each record for
uninitialized data.
Compile with "SOL8" defined for Solaris 8 or 9 (Sparc).
*/
/*
File: uemf_utf.c
Version: 0.0.5
Date: 29-JAN-2014
Author: David Mathog, Biology Division, Caltech
email: mathog@caltech.edu
Copyright: 2014 David Mathog and California Institute of Technology (Caltech)
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <iconv.h>
#include <wchar.h>
#include <errno.h>
#include <string.h>
#include <limits.h> // for INT_MAX, INT_MIN
#include <math.h> // for U_ROUND()
#include "uemf_utf.h"
//! \cond
/* Prototypes for functions used here and defined in uemf_endian.c, but which are not supposed
to be used in end user code. */
//! \endcond
/* ******************************************************************************************** */
/** \cond */
/* iconv() has a funny cast on some older systems, on most recent ones
it is just char **. This tries to work around the issue. If you build this
on another funky system this code may need to be modified, or define ICONV_CAST
on the compile line(but it may be tricky).
*/
#if _LIBICONV_VERSION == 0x0109
# define ICONV_CAST (const char **)
#endif // _LIBICONV_VERSION 0x0109
#ifdef SOL8
# define ICONV_CAST (const char **)
#endif //SOL8
#if !defined(ICONV_CAST)
# define ICONV_CAST (char **)
#endif //ICONV_CAST
/** \endcond */
/* **********************************************************************************************
These functions are used for development and debugging and should be be includied in production code.
*********************************************************************************************** */
/**
\brief Dump a UTF8 string. Not for use in production code.
\param src string to examine
*/
void wchar8show(
const char *src
){
if(!src){
printf("char show <NULL>\n");
}
else {
printf("char show\n");
}
}
/**
\brief Dump a UTF16 string. Not for use in production code.
\param src string to examine
*/
void wchar16show(
){
if(!src){
printf("uint16_t show <NULL>\n");
}
else {
printf("uint16_t show\n");
}
}
/**
\brief Dump a UTF32 string. Not for use in production code.
*/
void wchar32show(
){
if(!src){
printf("uint32_t show <NULL>\n");
}
else {
printf("uint32_t show\n");
}
}
/**
\brief Dump a wchar_t string. Not for use in production code.
\param src string to examine
*/
void wchartshow(
){
if(!src){
printf("wchar_t show <NULL>\n");
}
else {
printf("wchar_t show\n");
if(!src)return;
while(*src){
val = *src; // because *src is wchar_t is not strictly an integer type, can cause warnings on next line
srclen++;
src++;
}
}
}
/* **********************************************************************************************
These functions are used for character type conversions, Image conversions, and other
utility operations
*********************************************************************************************** */
/**
\brief Find the number of (storage) characters in a 16 bit character string, not including terminator.
\param src string to examine
*/
){
if(src){
}
return(srclen);
}
/**
\brief Find the number of (storage) characters in a 32 bit character string, not including terminator.
\param src string to examine
*/
){
if(src){
}
return(srclen);
}
/**
\brief Strncpy for wchar16 (UTF16).
\param dst destination (already allocated)
\param src source
\param nchars number of characters to copy
*/
void wchar16strncpy(
){
if(src){
if(!*src)break;
}
}
}
/**
\brief Fill the output string with N characters, if the input string is shorter than N, pad with nulls.
\param dst destination (already allocated)
\param src source
\param nchars number of characters to copy
*/
void wchar16strncpypad(
){
if(src){
}
}
/* For the following converstion functions, remember that iconv() modifies ALL of its parameters,
so save a pointer to the destination buffer!!!!
It isn't clear that terminators are being
copied properly, so be sure allocated space is a bit larger and cleared.
*/
/**
\brief Convert a UTF32LE string to a UTF16LE string.
\returns pointer to new string or NULL if it fails
\param src wchar_t string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
){
if(dst){
}
else {
}
else if(len){
}
}
}
}
/**
\brief Convert a UTF16LE string to a UTF32LE string.
\return pointer to new string or NULL if it fails
\param src UTF16LE string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
){
if(dst){
}
else {
}
else if(len){
}
}
}
}
/**
\brief Convert a Latin1 string to a UTF32LE string.
\return pointer to new string or NULL if it fails
\param src Latin1 string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
U_EMR_EXTTEXTOUTA records are "8 bit ASCII". In theory that is ASCII in an 8
bit character, but numerous applications store Latin1 in them, and some
_may_ store UTF-8 in them. Since very vew Latin1 strings are valid UTF-8 strings,
call U_Utf8ToUtf32le first, and if it fails, then call this function.
*/
const char *src,
){
if(dst){
}
else {
}
else if(len){
}
}
}
}
/**
\brief Convert a UTF8 string to a UTF32LE string.
\return pointer to new string or NULL if it fails
\param src UTF8 string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
const char *src,
){
if(dst){
}
else {
}
else if(len){
}
}
}
}
/**
\brief Convert a UTF32LE string to a UTF8 string.
\return pointer to new string or NULL if it fails
\param src wchar_t string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
char *U_Utf32leToUtf8(
){
if(dst){
}
else {
}
else if(len){
}
}
}
return(dst2);
}
/**
\brief Convert a UTF-8 string to a UTF16-LE string.
\return pointer to new string or NULL if it fails
\param src UTF8 string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
const char *src,
){
if(dst){
}
else {
}
else if(len){
}
}
}
}
/**
\brief Convert a UTF16LE string to a UTF8 string.
\return pointer to new UTF8 string or NULL if it fails
\param src UTF16LE string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
char *U_Utf16leToUtf8(
){
// worst case is all glyphs (==max) need 4 UTF-8 encoded bytes + terminator.
if(dst){
}
else {
}
else if(len){
}
}
}
return(dst2);
}
/**
\brief Convert a UTF16LE string to a LATIN1 string.
\return pointer to new UTF8 string or NULL if it fails
\param src UTF16LE string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
*/
char *U_Utf16leToLatin1(
){
if(dst){
}
else {
}
else if(len){
}
}
}
return(dst2);
}
/**
\brief Put a single 16 bit character into UTF-16LE form.
Used in conjunction with U_Utf16leEdit(), because the character
representation would otherwise be dependent on machine Endianness.
\return UTF16LE representation of the character.
\param src 16 bit character
*/
#if U_BYTE_SWAP
#endif
return(dst);
}
/**
\brief Convert a UTF8 string to a Latin1 string.
\return pointer to new string or NULL if it fails
\param src Latin1 string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
WMF uses latin1, others UTF-8, only some utf-8 can be converted to latin1.
*/
char *U_Utf8ToLatin1(
const char *src,
){
if(dst){
iconv_t conv = iconv_open("LATIN1//TRANSLIT", "UTF-8"); // translate what can be, fill in with something close for the rest
}
else {
}
else if(len){
}
}
}
return((char *) dst2);
}
/**
\brief Convert a Latin1 string to a UTF8 string.
\return pointer to new string or NULL if it fails
\param src Latin1 string to convert
\param max number of characters to convert, if 0, until terminator
\param len number of characters in new string, NOT including terminator
WMF uses latin1, others UTF-8, all Latin1 should be able to convert to utf-8.
*/
char *U_Latin1ToUtf8(
const char *src,
){
dstlen = (1 + 2*srclen); // This should always work because all latin1 convert to 1 or 2 byte UTF8, it might waste some space
if(dst){
}
else {
}
else if(len){
}
}
}
return((char *) dst2);
}
/**
\brief Single character replacement in a UTF-16LE string.
Used solely for the Description field which contains
embedded nulls, which makes it difficult to manipulate. Use some other character and then swap it.
\return number of substitutions, or -1 if src is not defined
\param src UTF16LE string to edit
\param find character to replace
\param replace replacestitute character
*/
int U_Utf16leEdit(
){
int count=0;
if(!src)return(-1);
while(*src){
src++;
}
return(count);
}
/**
\brief strdup for when strict C99 compliance is enforced
\returns duplicate string or NULL on error
\param s string to duplicate
*/
char *U_strdup(const char *s){
if(s){
if(news){
}
}
return(news);
}
#ifdef __cplusplus
}
#endif