Lists

Terms and Conditions
Lists hosted on this site
Email the Postmaster
Tips for posting to public mailing lists

iconv (libiconv.dylib) broken

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

iconv (libiconv.dylib) broken

Subject: iconv (libiconv.dylib) broken
From: Andreas Grosam <email@hidden>
Date: Tue, 9 Feb 2010 20:58:11 +0100

Hi All,

after experimenting with the iconv library it seems that it is broken on Mac OS X.
<http://www.gnu.org/software/libiconv/>

The library provides functions for character encoding. The library is installed on Max OS X per default, but is not normally used in Cocoa applications. So, this may be of interest only for sources which get ported over to Mac OS X or any application or library linking against libiconv.dylib.

I would appreciate it if somebody could confirm this.

Below is a test program (console) which shows my findings.
(In order to compile, you need to link against libiconv.dylib. There is also a dependency to boost, but this can be easily eliminated).


Regards,

Andreas



// File: main.cpp
// preferred editor encoding: UTF-8

//
// iconv test
//


#include <iostream>
#include <iomanip>
#include <stdexcept>
#include <string>
#include <errno.h>
#include <boost/format.hpp>
#include <iconv.h>


std::wstring convertToWstring(const char*   in_buffer,
                              size_t        in_size,
                              const char*   fromCharset,
                              const char*   toCharset = "WCHAR_T")
{
    iconv_t cd = iconv_open (toCharset, fromCharset);
    if (cd == (iconv_t) -1)
    {
        // Something went wrong:
        if (errno == EINVAL) {
            std::string message = (boost::format("conversion from '%1%' to %2% not available") % fromCharset % toCharset).str();
            throw std::runtime_error(message);
        }
        else {
            throw std::runtime_error (strerror(errno));
        }
    }

    // Determine size of buffer for the resulting string, with no assumptions about the
    // encoding and allocate it:
    const size_t wbuffer_size = (in_size)*sizeof(wchar_t);  // worst case is 4*in_size
    std::auto_ptr<char>  wbuffer(new char[wbuffer_size]);
#ifndef NDEBUG
    memset(wbuffer.get(), 0, wbuffer_size);
#endif

    char*   in_ptr = const_cast<char*>(in_buffer); // work around the unfortunate C declartion of iconv
    size_t  in_buffer_bytes_left = in_size;
    char*   out_ptr = wbuffer.get();
    size_t  out_buffer_bytes_left = wbuffer_size;

    // convert the character sequence according the encodings:
    size_t nconv = iconv (cd, &in_ptr, &in_buffer_bytes_left, &out_ptr, &out_buffer_bytes_left);
    if (nconv == (size_t) -1) {
        int err = errno;
        iconv_close (cd);
        throw std::runtime_error (strerror(err));
    }

    // flush any pending characters to the output buffer, if any:
    nconv = iconv (cd, NULL, NULL, &out_ptr, &out_buffer_bytes_left);
    if (nconv == (size_t) -1) {
        int err = errno;
        iconv_close (cd);
        throw std::runtime_error (strerror(err));
    }

    // close the handle:
    if (iconv_close (cd) != 0) {
        throw std::runtime_error (strerror(errno));
    }

    size_t ob_size = wbuffer_size - out_buffer_bytes_left;
    int nelem = ob_size/sizeof(wchar_t);

    // Sanity checks:
    assert(ob_size == out_ptr - wbuffer.get());
    assert(in_buffer_bytes_left == 0);
    assert(in_ptr == in_buffer + in_size);
    assert(nelem * sizeof(wchar_t) == ob_size);


#if 1
    // So, print out what's going on:

    std::cout << "--- iconv start ---" << std::endl;
    std::cout << "   in buffer: ";
    const char* istart = in_buffer;
    const char* iend = istart + in_size;
    while (istart < iend) {
        unsigned int c = (unsigned char)(*istart);
        std::cout << std::setw(2) << std::setfill('0') << std::hex << c << " ";
        ++istart;
    }
    std::cout << std::endl;

    std::cout << "  out buffer: ";
    char* start = wbuffer.get();
    char* end = start + ob_size;
    while (start < end) {
        unsigned int c = (unsigned char)(*start);
        std::cout << std::setw(2) << std::setfill('0') << std::hex << c << " ";
        ++start;
    }
    std::cout << std::endl;

    std::cout << "wchar buffer: ";
    wchar_t* wstart = reinterpret_cast<wchar_t*>(wbuffer.get());
    wchar_t* wend = wstart + nelem;
    while (wstart < wend) {
        wchar_t c = *wstart;
        std::cout << std::setw(sizeof(wchar_t)*2) << std::setfill('0') << std::hex << c << " ";
        ++wstart;
    }
    std::cout << std::endl;
    std::cout << "--- iconv end ---" << std::endl;
#endif

    std::wstring result((wchar_t*)wbuffer.get(), nelem);

    return result;
}


void check(const wchar_t* oughtTo, const std::wstring& test)
{
    if (std::wstring(oughtTo) != test)
    {
        std::cout << "conversion failed:" << std::endl;
        const wchar_t* p = oughtTo;
        std::cout << "ought to: ";
        while (*p != 0) {
            std::cout << std::setw(sizeof(wchar_t)*2) << std::setfill('0') << std::hex << *p++ << " ";
        }
        std::cout << std::endl;

        std::cout << "test:     ";

        for (int i = 0; i < test.size(); ++i) {
            std::cout << std::setw(sizeof(wchar_t)*2) << std::setfill('0') << std::hex << test.at(i) << " ";
        }
        std::cout << std::endl;
    }
}


int main (int argc, const char * argv[])
{
    try {
        // Test editor encoding:
        std::wstring wStr = L"TüT";
        std::wstring wcheck = L"T\u00fcT"; // <http://www.fileformat.info/info/unicode/char/00fc/index.htm>
        if (wStr != wcheck) {
            std::cout << "error:  bogus string representation" << std::endl;
            return -1;
        }

        std::string utf8Str = "TüT";
        std::string utf8Check = "T\xC3\xBCT"; // <http://www.fileformat.info/info/unicode/char/00fc/index.htm>
        if (utf8Str != utf8Check) {
            std::cout << "error:  bogus string representation" << std::endl;
            return -1;
        }


        // Test iconv
        std::wstring wtest;

        wtest= convertToWstring("T\xC3\xBCT", 4, "UTF-8", "WCHAR_T"); // "TüT"
        check(L"T\u00fcT", wtest);

        wtest = convertToWstring("T\xC3\x83T", 4, "UTF-8", "WCHAR_T");  // "TÃT"
        check(L"T\u00c3T", wtest);

        wtest = convertToWstring("T\xC3\x84T", 4, "UTF-8", "WCHAR_T");  // "TÄT"
        check(L"T\u00c4T", wtest);

        wtest = convertToWstring("T\xC3\x85T", 4, "UTF-8", "WCHAR_T");  // "TÅT"
        check(L"T\u00c5T", wtest);

        /*
        wtest = convertToWstring("T\xC3\xBCT", 4, "UTF-8", "UTF-32"); // "TüT"
        check(L"T\u00fcT", wtest);

        wtest = convertToWstring("T\xC3\xB3T", 4, "UTF-8", "UTF-32"); // "TÃT"
        check(L"T\u00c3T", wtest);
	*/

    }
    catch (std::exception& ex) {
        std::cout << ex.what() << std::endl;
        return -1;
    }

    return 0;
}

 _______________________________________________
Do not post admin requests to the list. They will be ignored.
Xcode-users mailing list      (email@hidden)
Help/Unsubscribe/Update your Subscription:

This email sent to email@hidden

Follow-Ups:
- Re: iconv (libiconv.dylib) broken
  - From: Jonas Maebe <email@hidden>

Prev by Date: Problems with Organizer
Next by Date: Re: iconv (libiconv.dylib) broken
Previous by thread: Problems with Organizer
Next by thread: Re: iconv (libiconv.dylib) broken
Index(es):
- Date
- Thread