diff --git a/convert.c b/convert.c new file mode 100644 index 0000000..1f5933a --- /dev/null +++ b/convert.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +/* Reference: https://stackoverflow.com/questions/2162390/iconv-encoding-conversion-problem */ + +static char * convert (const char *from_charset, const char *to_charset, const char *input) { + size_t inleft, outleft, converted = 0; + char *output, *outbuf, *tmp; + const char *inbuf; + size_t outlen; + iconv_t cd; + + if ((cd = iconv_open (to_charset, from_charset)) == (iconv_t) -1) + return NULL; + + inleft = strlen (input); + inbuf = input; + + /* we'll start off allocating an output buffer which is the same size + * as our input buffer. */ + outlen = inleft; + + /* we allocate 4 bytes more than what we need for nul-termination... */ + if (!(output = malloc (outlen + 4))) { + iconv_close (cd); + return NULL; + } + + do { + errno = 0; + outbuf = output + converted; + outleft = outlen - converted; + + converted = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft); + if (converted != (size_t) -1 || errno == EINVAL) { + /* + * EINVAL An incomplete multibyte sequence has been encounĀ­- + * tered in the input. + * + * We'll just truncate it and ignore it. + */ + break; + } + + if (errno != E2BIG) { + /* + * EILSEQ An invalid multibyte sequence has been encountered + * in the input. + * + * Bad input, we can't really recover from this. + */ + iconv_close (cd); + free (output); + return NULL; + } + + /* + * E2BIG There is not sufficient room at *outbuf. + * + * We just need to grow our outbuffer and try again. + */ + + converted = outbuf - output; + outlen += inleft * 2 + 8; + + if (!(tmp = realloc (output, outlen + 4))) { + iconv_close (cd); + free (output); + return NULL; + } + + output = tmp; + outbuf = output + converted; + } while (1); + + /* flush the iconv conversion */ + iconv (cd, NULL, NULL, &outbuf, &outleft); + iconv_close (cd); + + /* Note: not all charsets can be nul-terminated with a single + * nul byte. UCS2, for example, needs 2 nul bytes and UCS4 + * needs 4. I hope that 4 nul bytes is enough to terminate all + * multibyte charsets? */ + + /* nul-terminate the string */ + memset (outbuf, 0, 4); + + return output; +} + +const char * eucjp_to_utf8(const char src[]) { + return convert("EUC-JP", "UTF-8", src); +} diff --git a/convert.h b/convert.h new file mode 100644 index 0000000..c3dda7c --- /dev/null +++ b/convert.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef CONVERT_H +#define CONVERT_H + +const char * eucjp_to_utf8(const char src[]); + +#endif diff --git a/main.c b/main.c index 8ddd5cb..082633c 100644 --- a/main.c +++ b/main.c @@ -17,154 +17,18 @@ */ #include -#include -#include -#include -#include + +#include "convert.h" +#include "util.h" #include "eb/eb/eb.h" #include "eb/eb/error.h" #include "eb/eb/text.h" -/* - Constants -*/ - #define MAX_ENTRY_HITS 128 #define MAX_ENTRY_TEXT 1024 #define MAX_ENTRY_HEADER 512 -/* - Array functions -*/ - -typedef struct { - char header[MAX_ENTRY_HEADER]; - char text[MAX_ENTRY_TEXT]; -} Entry; - -typedef struct { - Entry* ptr; - size_t used; - size_t size; -} Array; - -void array_init(Array* arr, size_t init_size) { - arr->ptr = (Entry *)malloc(init_size * sizeof(Entry)); - arr->used = 0; - arr->size = init_size; -} - -Entry* array_new(Array* arr) { - if (arr->used == arr->size) { - arr->size *= 2; - arr->ptr = (Entry *)realloc(arr->ptr, arr->size * sizeof(Entry)); - } - - return &arr->ptr[arr->used++]; -} - -void array_free(Array* arr) { - free(arr->ptr); - arr->ptr = NULL; - arr->used = arr->size = 0; -} - -/* - Local functions -*/ - -// From https://stackoverflow.com/questions/2162390/iconv-encoding-conversion-problem - -static char * convert (const char *from_charset, const char *to_charset, const char *input) { - size_t inleft, outleft, converted = 0; - char *output, *outbuf, *tmp; - const char *inbuf; - size_t outlen; - iconv_t cd; - - if ((cd = iconv_open (to_charset, from_charset)) == (iconv_t) -1) - return NULL; - - inleft = strlen (input); - inbuf = input; - - /* we'll start off allocating an output buffer which is the same size - * as our input buffer. */ - outlen = inleft; - - /* we allocate 4 bytes more than what we need for nul-termination... */ - if (!(output = malloc (outlen + 4))) { - iconv_close (cd); - return NULL; - } - - do { - errno = 0; - outbuf = output + converted; - outleft = outlen - converted; - - converted = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft); - if (converted != (size_t) -1 || errno == EINVAL) { - /* - * EINVAL An incomplete multibyte sequence has been encounĀ­- - * tered in the input. - * - * We'll just truncate it and ignore it. - */ - break; - } - - if (errno != E2BIG) { - /* - * EILSEQ An invalid multibyte sequence has been encountered - * in the input. - * - * Bad input, we can't really recover from this. - */ - iconv_close (cd); - free (output); - return NULL; - } - - /* - * E2BIG There is not sufficient room at *outbuf. - * - * We just need to grow our outbuffer and try again. - */ - - converted = outbuf - output; - outlen += inleft * 2 + 8; - - if (!(tmp = realloc (output, outlen + 4))) { - iconv_close (cd); - free (output); - return NULL; - } - - output = tmp; - outbuf = output + converted; - } while (1); - - /* flush the iconv conversion */ - iconv (cd, NULL, NULL, &outbuf, &outleft); - iconv_close (cd); - - /* Note: not all charsets can be nul-terminated with a single - * nul byte. UCS2, for example, needs 2 nul bytes and UCS4 - * needs 4. I hope that 4 nul bytes is enough to terminate all - * multibyte charsets? */ - - /* nul-terminate the string */ - memset (outbuf, 0, 4); - - return output; -} - -static const char * eucjp_to_utf8(const char src[]) { - return convert("EUC-JP", "UTF-8", src); -} - static void dump_hits(EB_Book* book) { EB_Hit hits[MAX_ENTRY_HITS]; int hit_count = 0; @@ -183,7 +47,10 @@ static void dump_hits(EB_Book* book) { eb_read_text(book, NULL, NULL, NULL, MAX_ENTRY_TEXT, text, &text_length); const char* text_utf8 = eucjp_to_utf8(text); - puts(text_utf8); + if (text_utf8 != NULL) { + puts(text_utf8); + } + /* (void) text_utf8; */ } } @@ -252,10 +119,6 @@ static int process(const char path[]) { return 0; } -/* - Entry point -*/ - int main(int argc, char *argv[]) { if (argc != 2) { fprintf(stderr, "usage: %s path\n", argv[0]); diff --git a/util.c b/util.c new file mode 100644 index 0000000..dce901d --- /dev/null +++ b/util.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "util.h" + +void array_init(Array* arr, size_t init_size) { + arr->ptr = (Entry *)malloc(init_size * sizeof(Entry)); + arr->used = 0; + arr->size = init_size; +} + +Entry* array_new(Array* arr) { + if (arr->used == arr->size) { + arr->size *= 2; + arr->ptr = (Entry *)realloc(arr->ptr, arr->size * sizeof(Entry)); + } + + return &arr->ptr[arr->used++]; +} + +void array_free(Array* arr) { + free(arr->ptr); + arr->ptr = NULL; + arr->used = arr->size = 0; +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..da81ffe --- /dev/null +++ b/util.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2016 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef UTIL_H +#define UTIL_H + +#include + +typedef struct { + char* header; + char* text; +} Entry; + +typedef struct { + Entry* ptr; + size_t used; + size_t size; +} Array; + +void array_init(Array* arr, size_t init_size); +Entry* array_new(Array* arr); +void array_free(Array* arr); + +#endif