WIP

2016-10-31 09:14:27 -07:00 · 2016-10-31 09:14:27 -07:00 · 0d87193cb1
commit 0d87193cb1
parent adde6c296c
1 changed files with 88 additions and 20 deletions
--- a/main.c
+++ b/main.c
@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <iconv.h>
 #include <string.h>
 #include <errno.h>
 #include <stdlib.h>
 #include "eb/eb/eb.h"
@ -73,29 +74,95 @@ void array_free(Array* arr) {
    Local functions
 */
-static const char * eucjp_to_utf8(const char src[]) {
+// From https://stackoverflow.com/questions/2162390/iconv-encoding-conversion-problem
    char src_buff[MAX_ENTRY_TEXT];
    size_t src_size = strlen(src);
    char * src_ptr = src_buff;
    memcpy(src_buff, src, src_size + 1);
-    static char dst_buff[MAX_ENTRY_TEXT];
+static char * convert (const char *from_charset, const char *to_charset, const char *input) {
-    size_t dst_size = MAX_ENTRY_TEXT;
+    size_t inleft, outleft, converted = 0;
-    char * dst_ptr = dst_buff;
+    char *output, *outbuf, *tmp;
-    *dst_buff = 0;
+    const char *inbuf;
    size_t outlen;
    iconv_t cd;
-    iconv_t conv = iconv_open("UTF-8", "EUC-JP");
+    if ((cd = iconv_open (to_charset, from_charset)) == (iconv_t) -1)
-    if (conv != (iconv_t)-1) {
+        return NULL;
-        if (iconv(conv, &src_ptr, &src_size, &dst_ptr, &dst_size) == (size_t)-1) {
+
-            printf("error");
+    inleft = strlen (input);
    inbuf = input;
    /* we'll start off allocating an output buffer which is the same size
     * as our input buffer. */
    outlen = inleft;
    /* we allocate 4 bytes more than what we need for nul-termination... */
    if (!(output = malloc (outlen + 4))) {
        iconv_close (cd);
        return NULL;
    }
    do {
        errno = 0;
        outbuf = output + converted;
        outleft = outlen - converted;
        converted = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft);
        if (converted != (size_t) -1 || errno == EINVAL) {
            /*
             * EINVAL  An  incomplete  multibyte sequence has been encoun-
             *         tered in the input.
             *
             * We'll just truncate it and ignore it.
             */
            break;
        }
    }
    else {
        printf("poop");
    }
-    iconv_close(conv);
+        if (errno != E2BIG) {
-    return dst_buff;
+            /*
             * EILSEQ An invalid multibyte sequence has been  encountered
             *        in the input.
             *
             * Bad input, we can't really recover from this. 
             */
            iconv_close (cd);
            free (output);
            return NULL;
        }
        /*
         * E2BIG   There is not sufficient room at *outbuf.
         *
         * We just need to grow our outbuffer and try again.
         */
        converted = outbuf - output;
        outlen += inleft * 2 + 8;
        if (!(tmp = realloc (output, outlen + 4))) {
            iconv_close (cd);
            free (output);
            return NULL;
        }
        output = tmp;
        outbuf = output + converted;
    } while (1);
    /* flush the iconv conversion */
    iconv (cd, NULL, NULL, &outbuf, &outleft);
    iconv_close (cd);
    /* Note: not all charsets can be nul-terminated with a single
     * nul byte. UCS2, for example, needs 2 nul bytes and UCS4
     * needs 4. I hope that 4 nul bytes is enough to terminate all
     * multibyte charsets? */
    /* nul-terminate the string */
    memset (outbuf, 0, 4);
    return output;
 }
 static const char * eucjp_to_utf8(const char src[]) {
    return convert("EUC-JP", "UTF-8", src);
 }
 static void dump_hits(EB_Book* book) {
@ -116,7 +183,8 @@ static void dump_hits(EB_Book* book) {
            eb_read_text(book, NULL, NULL, NULL, MAX_ENTRY_TEXT, text, &text_length);
            const char* text_utf8 = eucjp_to_utf8(text);
-            (void) text_utf8;
+            puts(text_utf8);
            /* (void) text_utf8; */
        }
    }
    while (hit_count > 0);