zero-epwing-go/setword.c

/*
 * Copyright (c) 1997-2006  Motoyuki Kasahara
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "build-pre.h"
#include "eb.h"
#include "error.h"
#include "build-post.h"

/*
 * Unexported functions.
 */
static void eb_fix_word(EB_Book *book, const EB_Search *search, char *word,
    char *canonicalized_word);
static EB_Error_Code eb_convert_latin(EB_Book *book, const char *input_word,
    char *word, EB_Word_Code *word_code);
static EB_Error_Code eb_convert_euc_jp(EB_Book *book, const char *input_word,
    char *word, EB_Word_Code *word_code);
static void eb_convert_katakana_jis(char *word);
static void eb_convert_hiragana_jis(char *word);
static void eb_convert_lower_latin(char *word);
static void eb_convert_lower_jis(char *word);
static void eb_delete_marks_jis(char *word);
static void eb_convert_long_vowels_jis(char *word);
static void eb_delete_long_vowels_jis(char *word);
static void eb_convert_double_consonants_jis(char *word);
static void eb_convert_contracted_sounds_jis(char *word);
static void eb_convert_small_vowels_jis(char *word);
static void eb_convert_voiced_consonants_jis(char *word);
static void eb_convert_p_sounds_jis(char *word);
static void eb_delete_spaces_latin(char *word);
static void eb_delete_spaces_jis(char *word);
static void eb_reverse_word_latin(char *word);
static void eb_reverse_word_jis(char *word);


/*
 * Make a fixed word and a cannonicalized word for `WORD SEARCH'.
 *
 * If `inputword' is a KANA word,  EB_WORD_KANA is returned.
 * If `inputword' is a alphabetic word, EB_WORD_ALPHABET is returned.
 * Otherwise, -1 is returned.  It means that an error occurs.
 */
EB_Error_Code
eb_set_word(EB_Book *book, const char *input_word, char *word,
    char *canonicalized_word, EB_Word_Code *word_code)
{
    EB_Error_Code error_code;
    const EB_Search *search;

    LOG(("in: eb_set_word(book=%d, input_word=%s)", (int)book->code,
    eb_quoted_string(input_word)));

    /*
     * Make a fixed word and a canonicalized word from `input_word'.
     */
    if (book->character_code == EB_CHARCODE_ISO8859_1)
    error_code = eb_convert_latin(book, input_word, word, word_code);
    else
    error_code = eb_convert_euc_jp(book, input_word, word, word_code);
    if (error_code != EB_SUCCESS)
    goto failed;
    strcpy(canonicalized_word, word);

    /*
     * Determine search method.
     */
    switch (*word_code) {
    case EB_WORD_ALPHABET:
    if (book->subbook_current->word_alphabet.start_page != 0)
        search = &book->subbook_current->word_alphabet;
    else if (book->subbook_current->word_asis.start_page != 0)
        search = &book->subbook_current->word_asis;
    else {
        error_code = EB_ERR_NO_SUCH_SEARCH;
        goto failed;
    }
    break;

    case EB_WORD_KANA:
    if (book->subbook_current->word_kana.start_page != 0)
        search = &book->subbook_current->word_kana;
    else if (book->subbook_current->word_asis.start_page != 0)
        search = &book->subbook_current->word_asis;
    else {
        error_code = EB_ERR_NO_SUCH_SEARCH;
        goto failed;
    }
    break;

    case EB_WORD_OTHER:
    if (book->subbook_current->word_asis.start_page != 0)
        search = &book->subbook_current->word_asis;
    else {
        error_code = EB_ERR_NO_SUCH_SEARCH;
        goto failed;
    }
    break;

    default:
    error_code = EB_ERR_NO_SUCH_SEARCH;
    goto failed;
    }

    /*
     * Fix the word.
     */
    eb_fix_word(book, search, word, canonicalized_word);

    LOG(("out: eb_set_word(word=%s, canonicalized_word=%s, word_code=%d) = %s",
    eb_quoted_string(word), eb_quoted_string(canonicalized_word),
    (int)*word_code, eb_error_string(EB_SUCCESS)));

    return EB_SUCCESS;

    /*
     * An error occurs...
     */
  failed:
    *word = '\0';
    *canonicalized_word = '\0';
    *word_code = EB_WORD_INVALID;
    LOG(("out: eb_set_word() = %s", eb_error_string(error_code)));
    return error_code;
}


/*
 * Make a fixed word and a cannonicalized word for `ENDWORD SEARCH'.
 *
 * If `input_word' is a KANA word,  EB_WORD_KANA is retuend.
 * If `input_word' is a alphabetic word,  EB_WORD_ALPHABET is retuend.
 * Otherwise, -1 is returned.  It means that an error occurs.
 */
EB_Error_Code
eb_set_endword(EB_Book *book, const char *input_word, char *word,
    char *canonicalized_word, EB_Word_Code *word_code)
{
    EB_Error_Code error_code;
    const EB_Search *search;

    LOG(("in: eb_set_endword(book=%d, input_word=%s)", (int)book->code,
    eb_quoted_string(input_word)));

    /*
     * Make a fixed word and a canonicalized word from `input_word'.
     */
    if (book->character_code == EB_CHARCODE_ISO8859_1)
    error_code = eb_convert_latin(book, input_word, word, word_code);
    else
    error_code = eb_convert_euc_jp(book, input_word, word, word_code);
    if (error_code != EB_SUCCESS)
    goto failed;
    strcpy(canonicalized_word, word);

    /*
     * Determine search method.
     */
    switch (*word_code) {
    case EB_WORD_ALPHABET:
    if (book->subbook_current->endword_alphabet.start_page != 0)
        search = &book->subbook_current->endword_alphabet;
    else if (book->subbook_current->endword_asis.start_page != 0)
        search = &book->subbook_current->endword_asis;
    else {
        error_code = EB_ERR_NO_SUCH_SEARCH;
        goto failed;
    }
    break;

    case EB_WORD_KANA:
    if (book->subbook_current->endword_kana.start_page != 0)
        search = &book->subbook_current->endword_kana;
    else if (book->subbook_current->endword_asis.start_page != 0)
        search = &book->subbook_current->endword_asis;
    else {
        error_code = EB_ERR_NO_SUCH_SEARCH;
        goto failed;
    }
    break;

    case EB_WORD_OTHER:
    if (book->subbook_current->endword_asis.start_page != 0)
        search = &book->subbook_current->endword_asis;
    else {
        error_code = EB_ERR_NO_SUCH_SEARCH;
        goto failed;
    }
    break;

    default:
    error_code = EB_ERR_NO_SUCH_SEARCH;
    goto failed;
    }

    /*
     * Fix the word.
     */
    eb_fix_word(book, search, word, canonicalized_word);

    /*
     * Reverse the word.
     */
    if (book->character_code == EB_CHARCODE_ISO8859_1) {
    eb_reverse_word_latin(word);
    eb_reverse_word_latin(canonicalized_word);
    } else {
    eb_reverse_word_jis(word);
    eb_reverse_word_jis(canonicalized_word);
    }

    LOG(("out: eb_set_endword(word=%s, canonicalized_word=%s, word_code=%d) \
= %s",
    eb_quoted_string(word), eb_quoted_string(canonicalized_word),
    (int)*word_code, eb_error_string(EB_SUCCESS)));

    return EB_SUCCESS;

    /*
     * An error occurs...
     */
  failed:
    *word = '\0';
    *canonicalized_word = '\0';
    *word_code = EB_WORD_INVALID;
    LOG(("out: eb_set_endword() = %s", eb_error_string(error_code)));
    return error_code;
}


/*
 * Make a fixed word and a cannonicalized word for `KEYWORD SEARCH'
 * or `CROSS SEARCH'.
 *
 * If `inputword' is a KANA word,  EB_WORD_KANA is returned.
 * If `inputword' is a alphabetic word, EB_WORD_ALPHABET is returned.
 * Otherwise, -1 is returned.  It means that an error occurs.
 */
EB_Error_Code
eb_set_keyword(EB_Book *book, const char *input_word, char *word,
    char *canonicalized_word, EB_Word_Code *word_code)
{
    EB_Error_Code error_code;

    LOG(("in: eb_set_keyword(book=%d, input_word=%s)", (int)book->code,
    eb_quoted_string(input_word)));

    /*
     * Make a fixed word and a canonicalized word from `input_word'.
     */
    if (book->character_code == EB_CHARCODE_ISO8859_1)
    error_code = eb_convert_latin(book, input_word, word, word_code);
    else
    error_code = eb_convert_euc_jp(book, input_word, word, word_code);
    if (error_code != EB_SUCCESS)
    goto failed;
    strcpy(canonicalized_word, word);

    /*
     * Fix the word.
     */
    eb_fix_word(book, &book->subbook_current->keyword, word,
    canonicalized_word);

    LOG(("out: eb_set_keyword(word=%s, canonicalized_word=%s, word_code=%d) \
= %s",
    eb_quoted_string(word), eb_quoted_string(canonicalized_word),
    (int)*word_code, eb_error_string(EB_SUCCESS)));

    return EB_SUCCESS;

    /*
     * An error occurs...
     */
  failed:
    *word = '\0';
    *canonicalized_word = '\0';
    *word_code = EB_WORD_INVALID;
    LOG(("out: eb_set_keyword() = %s", eb_error_string(error_code)));
    return error_code;
}


/*
 * Make a fixed word and a cannonicalized word for `MULTI SEARCH'.
 *
 * If `inputword' is a KANA word,  EB_WORD_KANA is returned.
 * If `inputword' is a alphabetic word, EB_WORD_ALPHABET is returned.
 * Otherwise, -1 is returned.  It means that an error occurs.
 */
EB_Error_Code
eb_set_multiword(EB_Book *book, EB_Multi_Search_Code multi_id,
    EB_Multi_Entry_Code entry_id, const char *input_word, char *word,
    char *canonicalized_word, EB_Word_Code *word_code)
{
    EB_Error_Code error_code;
    EB_Search *search;

    LOG(("in: eb_set_multiword(book=%d, input_word=%s)", (int)book->code,
    eb_quoted_string(input_word)));

    /*
     * Make a fixed word and a canonicalized word from `input_word'.
     */
    if (book->character_code == EB_CHARCODE_ISO8859_1)
    error_code = eb_convert_latin(book, input_word, word, word_code);
    else
    error_code = eb_convert_euc_jp(book, input_word, word, word_code);
    if (error_code != EB_SUCCESS)
    goto failed;
    strcpy(canonicalized_word, word);

    /*
     * Fix the word.
     */
    search = &book->subbook_current->multis[multi_id].entries[entry_id];
    eb_fix_word(book, search, word, canonicalized_word);

    LOG(("out: eb_set_multiword(word=%s, canonicalized_word=%s, word_code=%d) \
= %s",
    eb_quoted_string(word), eb_quoted_string(canonicalized_word),
    (int)*word_code, eb_error_string(EB_SUCCESS)));

    return EB_SUCCESS;

    /*
     * An error occurs...
     */
  failed:
    *word = '\0';
    *canonicalized_word = '\0';
    *word_code = EB_WORD_INVALID;
    LOG(("out: eb_set_multiword() = %s", eb_error_string(error_code)));
    return error_code;
}


/*
 * Fix `canonicalized_word' and `word' according with `book->character_code'
 * and `search'.
 */
static void
eb_fix_word(EB_Book *book, const EB_Search *search, char *word,
    char *canonicalized_word)
{
    LOG(("in: eb_fix_word(book=%d, word=%s, canonicalized_word=%s)",
    (int)book->code, eb_quoted_string(word),
    eb_quoted_string(canonicalized_word)));

    if (search->index_id == 0xa1 && search->candidates_page != 0)
    return;

    if (book->character_code == EB_CHARCODE_ISO8859_1) {
    if (search->space == EB_INDEX_STYLE_DELETE)
        eb_delete_spaces_latin(canonicalized_word);

    if (search->lower == EB_INDEX_STYLE_CONVERT)
        eb_convert_lower_latin(canonicalized_word);

    } else {
    if (search->space == EB_INDEX_STYLE_DELETE)
        eb_delete_spaces_jis(canonicalized_word);

    if (search->katakana == EB_INDEX_STYLE_CONVERT)
        eb_convert_katakana_jis(canonicalized_word);
    else if (search->katakana == EB_INDEX_STYLE_REVERSED_CONVERT)
        eb_convert_hiragana_jis(canonicalized_word);

    if (search->lower == EB_INDEX_STYLE_CONVERT)
        eb_convert_lower_jis(canonicalized_word);

    if (search->mark == EB_INDEX_STYLE_DELETE)
        eb_delete_marks_jis(canonicalized_word);

    if (search->long_vowel == EB_INDEX_STYLE_CONVERT)
        eb_convert_long_vowels_jis(canonicalized_word);
    else if (search->long_vowel == EB_INDEX_STYLE_DELETE)
        eb_delete_long_vowels_jis(canonicalized_word);

    if (search->double_consonant == EB_INDEX_STYLE_CONVERT)
        eb_convert_double_consonants_jis(canonicalized_word);

    if (search->contracted_sound == EB_INDEX_STYLE_CONVERT)
        eb_convert_contracted_sounds_jis(canonicalized_word);

    if (search->small_vowel == EB_INDEX_STYLE_CONVERT)
        eb_convert_small_vowels_jis(canonicalized_word);

    if (search->voiced_consonant == EB_INDEX_STYLE_CONVERT)
        eb_convert_voiced_consonants_jis(canonicalized_word);

    if (search->p_sound == EB_INDEX_STYLE_CONVERT)
        eb_convert_p_sounds_jis(canonicalized_word);
    }

    if (search->index_id != 0x70 && search->index_id != 0x90)
    strcpy(word, canonicalized_word);

    LOG(("out: eb_fix_word(word=%s, canonicalized_word=%s)",
    eb_quoted_string(word), eb_quoted_string(canonicalized_word)));
}


/*
 * Convert `input_word' to ISO 8859 1 and put it into `word'.
 *
 * If `input_word' is a valid string to search, EB_WORD_ALPHABET is returned.
 * Otherwise, -1 is returned.
 */
static EB_Error_Code
eb_convert_latin(EB_Book *book, const char *input_word, char *word,
    EB_Word_Code *word_code)
{
    EB_Error_Code error_code;
    unsigned char *wp = (unsigned char *) word;
    const unsigned char *inp = (const unsigned char *) input_word;
    const unsigned char *tail;
    unsigned char c1;
    int word_length = 0;

    LOG(("in: eb_convert_latin(book=%d, input_word=%s)", (int)book->code,
    eb_quoted_string(input_word)));

    /*
     * Find the tail of `input_word'.
     */
    tail = (const unsigned char *) input_word + strlen(input_word) - 1;
    while ((const unsigned char *)input_word <= tail
    && (*tail == ' ' || *tail == '\t'))
    tail--;
    tail++;

    /*
     * Ignore spaces and tabs in the beginning of `input_word'.
     */
    while (*inp == ' ' || *inp == '\t')
    inp++;

    while (inp < tail) {
    /*
     * Check for the length of the word.
     * If exceeds, return with an error code.
     */
    if (EB_MAX_WORD_LENGTH < word_length + 1) {
        error_code = EB_ERR_TOO_LONG_WORD;
        goto failed;
    }

    c1 = *inp++;

    /*
     * Tabs are translated to spaces.
     */
    if (c1 == '\t')
        c1 = ' ';

    *wp++ = c1;

    /*
     * Skip successive spaces and tabs.
     */
    if (c1 == ' ') {
        while (*inp == '\t' || *inp == ' ')
        inp++;
    }

    word_length++;
    }
    *wp = '\0';

    if (word_length == 0) {
    error_code = EB_ERR_EMPTY_WORD;
    goto failed;
    }
    *word_code = EB_WORD_ALPHABET;

    LOG(("out: eb_convert_latin(word=%s, word_code=%d) = %s",
    eb_quoted_string(word), (int)*word_code, eb_error_string(EB_SUCCESS)));

    return EB_SUCCESS;

    /*
     * An error occurs...
     */
  failed:
    *word = '\0';
    *word_code = EB_WORD_INVALID;
    LOG(("out: eb_convert_latin() = %s", eb_error_string(error_code)));
    return error_code;
}


/*
 * Table used to convert JIS X 0208 to ASCII.
 */
static const unsigned int jisx0208_table[] = {
    /* 0x20 -- 0x2f */
    0x2121, 0x212a, 0x2149, 0x2174, 0x2170, 0x2173, 0x2175, 0x2147,
    0x214a, 0x214b, 0x2176, 0x215c, 0x2124, 0x215d, 0x2125, 0x213f,
    /* 0x30 -- 0x3f */
    0x2330, 0x2331, 0x2332, 0x2333, 0x2334, 0x2335, 0x2336, 0x2337,
    0x2338, 0x2339, 0x2127, 0x2128, 0x2163, 0x2161, 0x2164, 0x2129,
    /* 0x40 -- 0x4f */
    0x2177, 0x2341, 0x2342, 0x2343, 0x2344, 0x2345, 0x2346, 0x2347,
    0x2348, 0x2349, 0x234a, 0x234b, 0x234c, 0x234d, 0x234e, 0x234f,
    /* 0x50 -- 0x5f */
    0x2350, 0x2351, 0x2352, 0x2353, 0x2354, 0x2355, 0x2356, 0x2357,
    0x2358, 0x2359, 0x235a, 0x214e, 0x2140, 0x214f, 0x2130, 0x2132,
    /* 0x60 -- 0x6f */
    0x2146, 0x2361, 0x2362, 0x2363, 0x2364, 0x2365, 0x2366, 0x2367,
    0x2368, 0x2369, 0x236a, 0x236b, 0x236c, 0x236d, 0x236e, 0x236f,
    /* 0x70 -- 0x7e */
    0x2370, 0x2371, 0x2372, 0x2373, 0x2374, 0x2375, 0x2376, 0x2377,
    0x2378, 0x2379, 0x237a, 0x2150, 0x2143, 0x2151, 0x2141
};

/*
 * Table used to convert JIS X 0201 KATAKANA to JIS X 0208.
 */
static const unsigned int jisx0201_table[] = {
    /* 0xa0 -- 0xaf */
    0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
    0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
    /* 0xb0 -- 0xbf */
    0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
    0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
    /* 0xc0 -- 0xcf */
    0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
    0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
    /* 0xd0 -- 0xdf */
    0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
    0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
};

/*
 * Convert `input_word' to JIS X0208 and put it into `word'.
 *
 * If `input_word' is a valid string to search, EB_WORD_ALPHABET or
 * EB_WORD_KANA is returned.
 * Otherwise, -1 is returned.
 */
static EB_Error_Code
eb_convert_euc_jp(EB_Book *book, const char *input_word, char *word,
    EB_Word_Code *word_code)
{
    EB_Error_Code error_code;
    unsigned char *wp = (unsigned char *) word;
    const unsigned char *inp = (const unsigned char *) input_word;
    const unsigned char *tail;
    unsigned char c1 = 0, c2 = 0;
    int kana_count = 0;
    int alphabet_count = 0;
    int kanji_count = 0;
    int word_length = 0;

    LOG(("in: eb_convert_euc_jp(book=%d, input_word=%s)", (int)book->code,
    eb_quoted_string(input_word)));

    /*
     * Find the tail of `input_word'.
     */
    tail = (const unsigned char *) input_word + strlen(input_word) - 1;
    for (;;) {
    if (inp < tail && (*tail == ' ' || *tail == '\t'))
        tail--;
    else if (inp < tail - 1 && *tail == 0xa1 && *(tail - 1) == 0xa1)
        tail -= 2;
    else
        break;
    }
    tail++;

    /*
     * Ignore spaces and tabs in the beginning of `input_word'.
     */
    for (;;) {
    if (*inp == ' ' || *inp == '\t')
        inp++;
    else if (*inp == 0xa1 && *(inp + 1) == 0xa1)
        inp += 2;
    else
        break;
    }

    while (inp < tail) {
    /*
     * Check for the length of the word.
     * If exceeds, return with an error code.
     */
    if (EB_MAX_WORD_LENGTH < word_length + 2) {
        error_code = EB_ERR_TOO_LONG_WORD;
        goto failed;
    }

    /*
     * Tabs are translated to spaces.
     */
    c1 = *inp++;
    if (c1 == '\t')
        c1 = ' ';

    if (0x20 <= c1 && c1 <= 0x7e) {
        /*
         * `c1' is a character in ASCII.
         */
        unsigned int c = jisx0208_table[c1 - 0x20];
        c1 = c >> 8;
        c2 = c & 0xff;
    } else if (0xa1 <= c1 && c1 <= 0xfe) {
        /*
         * `c1' is a character in JIS X 0208, or local character.
         */
        c2 = *inp++;

        if (0xa1 <= c2 && c2 <= 0xfe) {
        c1 &= 0x7f;
        c2 &= 0x7f;
        } else if (c2 < 0x20 || 0x7e < c2) {
        error_code = EB_ERR_BAD_WORD;
        goto failed;
        }
    } else if (c1 == 0x8e) {
        /*
         * `c1' is SS2.
         */
        if (c2 < 0xa1 || 0xdf < c2) {
        error_code = EB_ERR_BAD_WORD;
        goto failed;
        }
        c2 = jisx0201_table[c2 - 0xa0];
        c1 = 0x25;
    } else {
        error_code = EB_ERR_BAD_WORD;
        goto failed;
    }

    /*
     * The following characters are recognized as alphabet.
     *   2330 - 2339: `0' .. `9'
     *   2341 - 235a: `A' .. `Z'
     *   2361 - 237a: `a' .. `z' (convert to upper cases)
     */
    *wp++ = c1;
    *wp++ = c2;

    if (c1 == 0x23)
        alphabet_count++;
    else if (c1 == 0x24 || c1 == 0x25)
        kana_count++;
    else if (c1 != 0x21)
        kanji_count++;

    word_length += 2;
    }
    *wp = '\0';

    if (word_length == 0) {
    error_code = EB_ERR_EMPTY_WORD;
    goto failed;
    }
    if (alphabet_count == 0 && kana_count != 0 && kanji_count == 0)
    *word_code = EB_WORD_KANA;
    else if (alphabet_count != 0 && kana_count == 0 && kanji_count == 0)
    *word_code = EB_WORD_ALPHABET;
    else
    *word_code = EB_WORD_OTHER;

    LOG(("out: eb_convert_euc_jp(word=%s, word_code=%d) = %s",
    eb_quoted_string(word), (int)*word_code, eb_error_string(EB_SUCCESS)));

    return EB_SUCCESS;

    /*
     * An error occurs...
     */
  failed:
    *word = '\0';
    *word_code = EB_WORD_INVALID;
    LOG(("out: eb_convert_euc_jp() = %s", eb_error_string(error_code)));
    return error_code;
}


/*
 * Convert KATAKANA to HIRAGANA in `word'.
 */
static void
eb_convert_katakana_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_katakana_jis(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x25 && 0x21 <= c2 && c2 <= 0x76) {
        /*
         * This is a KATAKANA.  Convert to corresponding HIRAGANA.
         */
        *wp = 0x24;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_katakana_jis()"));
}


/*
 * Convert HIRAGANA to KATAKANA in `word'.
 */
static void
eb_convert_hiragana_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_hiragana_jis(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x24 && 0x21 <= c2 && c2 <= 0x76) {
        /*
         * This is a HIRAGANA.  Convert to corresponding KATAKANA.
         */
        *wp = 0x25;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_hiragana_jis()"));
}


/*
 * Convert lower case to upper case in `word'.
 */
static void
eb_convert_lower_latin(char *word)
{
    unsigned char *wp = (unsigned char *) word;

    LOG(("in: eb_convert_lower_latin(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0') {
    if (('a' <= *wp && *wp <= 'z')
        || (0xe0 <= *wp && *wp <= 0xf6) || (0xf8 <= *wp && *wp <= 0xfe)) {
        /*
         * This is a lower case letter.  Convert to upper case.
         */
        *wp -= 0x20;
    }
    wp++;
    }
    *wp = '\0';

    LOG(("out: eb_convert_lower_latin()"));
}


/*
 * Convert lower case to upper case in `word'.
 */
static void
eb_convert_lower_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_lower_jis(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x23 && 0x61 <= c2 && c2 <= 0x7a) {
        /*
         * This is a lower case letter.  Convert to upper case.
         */
        *(wp + 1) = c2 - 0x20;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_lower_jis()"));
}


/*
 * Delete some marks in `word'.
 */
static void
eb_delete_marks_jis(char *word)
{
    unsigned char *in_wp = (unsigned char *) word;
    unsigned char *out_wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_delete_marks_jis(word=%s)", eb_quoted_string(word)));

    while (*in_wp != '\0' && *(in_wp + 1) != '\0') {
    c1 = *in_wp;
    c2 = *(in_wp + 1);

    if (c1 != 0x21
        || (c2 != 0x26 && c2 != 0x3e && c2 != 0x47 && c2 != 0x5d)) {
        /*
         * This is not a character to be deleted.
         */
        *out_wp = c1;
        *(out_wp + 1) = c2;
        out_wp += 2;
    }
    in_wp += 2;
    }
    *out_wp = '\0';

    LOG(("out: eb_delete_marks_jis()"));
}


/*
 * The table is used to convert long vowel marks.
 */
static const char long_vowel_table[] = {
    0x22, /* a(21) -> A(22) */      0x22, /* A(22) -> A(22) */
    0x24, /* i(23) -> I(24) */      0x24, /* I(24) -> I(24) */
    0x26, /* u(25) -> U(26) */      0x26, /* U(26) -> U(26) */
    0x28, /* e(27) -> E(28) */      0x28, /* E(28) -> E(28) */
    0x2a, /* o(29) -> O(2a) */      0x2a, /* O(2a) -> O(2a) */
    0x22, /* KA(2b) -> A(22) */     0x22, /* GA(2c) -> A(22) */
    0x24, /* KI(2d) -> I(24) */     0x24, /* GI(2e) -> I(24) */
    0x26, /* KU(2f) -> U(26) */     0x26, /* GU(30) -> U(26) */
    0x28, /* KE(31) -> E(28) */     0x28, /* GE(32) -> E(28) */
    0x2a, /* KO(33) -> O(2a) */     0x2a, /* GO(34) -> O(2a) */
    0x22, /* SA(35) -> A(22) */     0x22, /* ZA(36) -> A(22) */
    0x24, /* SI(37) -> I(24) */     0x24, /* ZI(38) -> I(24) */
    0x26, /* SU(39) -> U(26) */     0x26, /* ZU(3a) -> U(26) */
    0x28, /* SE(3b) -> E(28) */     0x28, /* ZE(3c) -> E(28) */
    0x2a, /* SO(3d) -> O(2a) */     0x2a, /* ZO(3e) -> O(2a) */
    0x22, /* TA(3f) -> A(22) */     0x22, /* DA(40) -> A(22) */
    0x24, /* TI(41) -> I(24) */     0x24, /* DI(42) -> I(24) */
    0x26, /* tu(43) -> U(26) */     0x26, /* TU(44) -> U(26) */
    0x26, /* DU(45) -> U(26) */     0x28, /* TE(46) -> E(28) */
    0x28, /* DE(47) -> E(28) */     0x2a, /* TO(48) -> O(2a) */
    0x2a, /* DO(49) -> O(2a) */     0x22, /* NA(4a) -> A(22) */
    0x24, /* NI(4b) -> I(24) */     0x26, /* NU(4c) -> U(26) */
    0x28, /* NE(4d) -> E(28) */     0x2a, /* NO(4e) -> O(2a) */
    0x22, /* HA(4f) -> A(22) */     0x22, /* BA(50) -> A(22) */
    0x22, /* PA(51) -> A(22) */     0x24, /* HI(52) -> I(24) */
    0x24, /* BI(53) -> I(24) */     0x24, /* PI(54) -> I(24) */
    0x26, /* HU(55) -> U(26) */     0x26, /* BU(56) -> U(26) */
    0x26, /* PU(57) -> U(26) */     0x28, /* HE(58) -> E(28) */
    0x28, /* BE(59) -> E(28) */     0x28, /* PE(5a) -> E(28) */
    0x2a, /* HO(5b) -> O(2a) */     0x2a, /* BO(5c) -> O(2a) */
    0x2a, /* PO(5d) -> O(2a) */     0x22, /* MA(5e) -> A(22) */
    0x24, /* MI(5f) -> I(24) */     0x26, /* MU(60) -> U(26) */
    0x28, /* ME(61) -> E(28) */     0x2a, /* MO(62) -> O(2a) */
    0x22, /* ya(63) -> A(22) */     0x22, /* YA(64) -> A(22) */
    0x26, /* yu(65) -> U(26) */     0x26, /* YU(66) -> U(26) */
    0x2a, /* yo(67) -> O(2a) */     0x2a, /* YO(68) -> O(2a) */
    0x22, /* RA(69) -> A(22) */     0x24, /* RI(6a) -> I(24) */
    0x26, /* RU(6b) -> U(26) */     0x28, /* RE(6c) -> E(28) */
    0x2a, /* RO(6d) -> O(2a) */     0x22, /* wa(6e) -> A(22) */
    0x22, /* WA(6f) -> A(22) */     0x24, /* WI(70) -> I(24) */
    0x28, /* WE(71) -> E(28) */     0x2a, /* WO(72) -> O(2a) */
    0x73, /* N (73) -> N(73) */     0x26, /* VU(74) -> U(26) */
    0x22, /* ka(75) -> A(22) */     0x28  /* ke(76) -> E(28) */
};


/*
 * Convert long vowel marks in `word' to the previous vowels.
 */
static void
eb_convert_long_vowels_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;
    unsigned char previous_c1 = '\0', previous_c2 = '\0';

    LOG(("in: eb_convert_long_vowels_jis(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x21 && c2 == 0x3c) {
        /*
         * The is a long vowel mark.
         * Convert to a vowel of the prev_ KANA character.
         * If prev_ character is not KANA, the conversion is
         * not done.
         */
        if ((previous_c1 == 0x24 || previous_c1 == 0x25)
        && 0x21 <= previous_c2 && previous_c2 <= 0x76) {
        *wp = previous_c1;
        *(wp + 1) = long_vowel_table[previous_c2 - 0x21];
        }
    }
    previous_c1 = c1;
    previous_c2 = c2;
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_long_vowels_jis()"));
}


/*
 * Delete long vowel marks in `word'.
 */
static void
eb_delete_long_vowels_jis(char *word)
{
    unsigned char *in_wp = (unsigned char *) word;
    unsigned char *out_wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_delete_long_vowels_jis(word=%s)", eb_quoted_string(word)));

    while (*in_wp != '\0' && *(in_wp + 1) != '\0') {
    c1 = *in_wp;
    c2 = *(in_wp + 1);

    if (c1 != 0x21 || c2 != 0x3c) {
        /*
         * The is not a long vowel mark.
         */
        *out_wp = c1;
        *(out_wp + 1) = c2;
        out_wp += 2;
    }
    in_wp += 2;
    }
    *out_wp = '\0';

    LOG(("out: eb_delete_long_vowels_jis()"));
}


/*
 * Convert the double consonant mark `tu' to `TU'.
 */
static void
eb_convert_double_consonants_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_double_consonants_jis(word=%s)",
    eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if ((c1 == 0x24 || c1 == 0x25) && c2 == 0x43) {
        /*
         * This is a double sound mark.  Convert to the corresponding
         * sound mark.
         */
        *(wp + 1) = c2 + 1;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_double_consonants_jis()"));
}


/*
 * Convert the contracted sound marks to the corresponding
 * non-contracted sound marks.
 * (`ya', `yu', `yo', `wa', `ka', `ke' -> `YA', `YU', `YO', `WA', `KA', `KE')
 */
static void
eb_convert_contracted_sounds_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_contracted_sounds_jis(word=%s)",
    eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x24 || c1 == 0x25) {
        /*
         * This is HIRAGANA or KANAKANA.
         * If this is a contracted sound mark, convert to the
         * corresponding uncontracted sound mark.
         */
        if (c2 == 0x63 || c2 == 0x65 || c2 == 0x67 || c2 == 0x6e)
        *(wp + 1) = c2 + 1;
        else if (c2 == 0x75)
        *(wp + 1) = 0x2b;
        else if (c2 == 0x76)
        *(wp + 1) = 0x31;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("in: eb_convert_contracted_sounds_jis()"));
}


/*
 * Convert the small vowels to the normal vowels.
 * (`a', `i', `u', `e', `o' -> `A', `I', `U', `E', `O')
 */
static void
eb_convert_small_vowels_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_small_vowels_jis(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x24 || c1 == 0x25) {
        /*
         * This is HIRAGANA or KANAKANA.
         * If this is a small vowel mark, convert to a normal vowel.
         */
        if (c2 == 0x21 || c2 == 0x23 || c2 == 0x25 || c2 == 0x27
        || c2 == 0x29)
        *(wp + 1) = c2 + 1;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_small_vowels_jis()"));
}


/*
 * The table is used to convert voiced consonant marks.
 */
static const char voiced_consonant_table[] = {
    0x21, /* a(21) -> a(22) */      0x22, /* A(22) -> A(22) */
    0x23, /* i(23) -> i(24) */      0x24, /* I(24) -> I(24) */
    0x25, /* u(25) -> u(26) */      0x26, /* U(26) -> U(26) */
    0x27, /* e(27) -> e(28) */      0x28, /* E(28) -> E(28) */
    0x29, /* o(29) -> o(2a) */      0x2a, /* O(2a) -> O(2a) */
    0x2b, /* KA(2b) -> KA(2b) */    0x2b, /* GA(2c) -> KA(2b) */
    0x2d, /* KI(2d) -> KI(2d) */    0x2d, /* GI(2e) -> KI(2d) */
    0x2f, /* KU(2f) -> KU(2f) */    0x2f, /* GU(30) -> KU(2f) */
    0x31, /* KE(31) -> KE(31) */    0x31, /* GE(32) -> KE(31) */
    0x33, /* KO(33) -> KO(33) */    0x33, /* GO(34) -> KO(33) */
    0x35, /* SA(35) -> SA(35) */    0x35, /* ZA(36) -> SA(35) */
    0x37, /* SI(37) -> SI(37) */    0x37, /* ZI(38) -> SI(37) */
    0x39, /* SU(39) -> SU(39) */    0x39, /* ZU(3a) -> SU(39) */
    0x3b, /* SE(3b) -> SE(3b) */    0x3b, /* ZE(3c) -> SE(3b) */
    0x3d, /* SO(3d) -> SO(3d) */    0x3d, /* ZO(3e) -> SO(3d) */
    0x3f, /* TA(3f) -> TA(3f) */    0x3f, /* DA(40) -> TA(3f) */
    0x41, /* TI(41) -> TI(41) */    0x41, /* DI(42) -> TI(41) */
    0x43, /* tu(43) -> TU(43) */    0x44, /* TU(44) -> TU(44) */
    0x44, /* DU(45) -> TU(44) */    0x46, /* TE(46) -> TE(46) */
    0x46, /* DE(47) -> TE(46) */    0x48, /* TO(48) -> TO(48) */
    0x48, /* DO(49) -> TO(48) */    0x4a, /* NA(4a) -> NA(4a) */
    0x4b, /* NI(4b) -> NI(4b) */    0x4c, /* NU(4c) -> NU(4c) */
    0x4d, /* NE(4d) -> NE(4d) */    0x4e, /* NO(4e) -> NO(4e) */
    0x4f, /* HA(4f) -> HA(4f) */    0x4f, /* BA(50) -> HA(4f) */
    0x51, /* PA(51) -> PA(51) */    0x52, /* HI(52) -> HI(52) */
    0x52, /* BI(53) -> HI(52) */    0x54, /* PI(54) -> PU(54) */
    0x55, /* HU(55) -> HU(55) */    0x55, /* BU(56) -> HU(55) */
    0x57, /* PU(57) -> PU(57) */    0x58, /* HE(58) -> HE(58) */
    0x58, /* BE(59) -> HE(58) */    0x5a, /* PE(5a) -> PE(5a) */
    0x5b, /* HO(5b) -> HO(5b) */    0x5b, /* BO(5c) -> HO(5b) */
    0x5d, /* PO(5d) -> PO(5d) */    0x5e, /* MA(5e) -> MA(5e) */
    0x5f, /* MI(5f) -> MI(5f) */    0x60, /* MU(60) -> MU(60) */
    0x61, /* ME(61) -> ME(61) */    0x62, /* MO(62) -> MO(62) */
    0x64, /* ya(63) -> ya(63) */    0x64, /* YA(64) -> YA(64) */
    0x66, /* yu(65) -> yu(65) */    0x66, /* YU(66) -> YU(66) */
    0x68, /* yo(67) -> yo(67) */    0x68, /* YO(68) -> YO(68) */
    0x69, /* RA(69) -> TA(69) */    0x6a, /* RI(6a) -> RI(6a) */
    0x6b, /* RU(6b) -> RU(6b) */    0x6c, /* RE(6c) -> RE(6c) */
    0x6d, /* RO(6d) -> RO(6d) */    0x6e, /* wa(6e) -> wa(6e) */
    0x6f, /* WA(6f) -> WA(6f) */    0x70, /* WI(70) -> WI(70) */
    0x71, /* WE(71) -> WE(71) */    0x72, /* WO(72) -> WO(72) */
    0x73, /* N(73) -> N(73) */      0x26, /* VU(74) -> U(26) */
    0x75, /* ka(75) -> ka(75) */    0x76  /* ke(76) -> ke(76) */
};

/*
 * Convert the contracted sound marks to the corresponding
 * non-contracted sound marks (e.g. `GA' to `KA').
 */
static void
eb_convert_voiced_consonants_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_voiced_consonants_jis(word=%s)",
    eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if ((c1 == 0x24 || c1 == 0x25) && 0x21 <= c2 && c2 <= 0x76) {
        /*
         * This is a voiced constonat mark.  Convert to the
         * corresponding unvoiced constonant mark.
         */
        *(wp + 1) = voiced_consonant_table[c2 - 0x21];
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_voiced_consonants_jis()"));
}


/*
 * Convert the p sound marks
 * (`PA', `PI', `PU', `PE', `PO' -> `HA', `HI', `HU', `HE', `HO')
 */
static void
eb_convert_p_sounds_jis(char *word)
{
    unsigned char *wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_convert_p_sounds_jis(word=%s)", eb_quoted_string(word)));

    while (*wp != '\0' && *(wp + 1) != '\0') {
    c1 = *wp;
    c2 = *(wp + 1);

    if (c1 == 0x24 || c1 == 0x25) {
        /*
         * This is HIRAGANA or KANAKANA.
         * If this is a p-sound mark, convert to the corresponding
         * unvoiced consonant mark.
         */
        if (c2 == 0x51 || c2 == 0x54 || c2 == 0x57 || c2 == 0x5a
        || c2 == 0x5d)
        *(wp + 1) = c2 - 2;
    }
    wp += 2;
    }
    *wp = '\0';

    LOG(("out: eb_convert_p_sounds_jis()"));
}


/*
 * Delete spaces in `word'.
 */
static void
eb_delete_spaces_latin(char *word)
{
    unsigned char *in_wp = (unsigned char *) word;
    unsigned char *out_wp = (unsigned char *) word;

    LOG(("in: eb_delete_space_latin(word=%s)", eb_quoted_string(word)));

    while (*in_wp != '\0') {
    if (*in_wp != ' ') {
        /*
         * This is not a space character of ISO 8859 1.
         */
        *out_wp = *in_wp;
        out_wp++;
    }
    in_wp++;
    }
    *out_wp = '\0';

    LOG(("out: eb_delete_space_latin()"));
}


/*
 * Delete spaces in `word'.
 */
static void
eb_delete_spaces_jis(char *word)
{
    unsigned char *in_wp = (unsigned char *) word;
    unsigned char *out_wp = (unsigned char *) word;
    unsigned char c1, c2;

    LOG(("in: eb_delete_space_jis(word=%s)", eb_quoted_string(word)));

    while (*in_wp != '\0' && *(in_wp + 1) != '\0') {
    c1 = *in_wp;
    c2 = *(in_wp + 1);

    if (c1 != 0x21 || c2 != 0x21) {
        /*
         * This is not a space character of JIS X 0208.
         */
        *out_wp = c1;
        *(out_wp + 1) = c2;
        out_wp += 2;
    }
    in_wp += 2;
    }
    *out_wp = '\0';

    LOG(("out: eb_delete_space_jis()"));
}


/*
 * Reverse a word for ENDWORD SEARCH.
 *
 * `word' is a word to reverse.  It must be an alphabetic word.
 * The reversed word is also put into `word'.
 */
static void
eb_reverse_word_latin(char *word)
{
    char *p1, *p2;
    int word_length;
    char c;

    LOG(("in: eb_reverse_word_latin(word=%s)", eb_quoted_string(word)));

    word_length = strlen(word);
    if (word_length == 0)
    return;
    for (p1 = word, p2 = word + word_length - 1; p1 < p2; p1++, p2--) {
    c = *p1;
    *p1 = *p2;
    *p2 = c;
    }

    LOG(("out: eb_reverse_word_latin()"));
}


/*
 * Reverse a word for ENDWORD SEARCH.
 *
 * `word' is a word to reverse.  It must be a KANA word.
 * The reversed word is also put into `word'.
 */
static void
eb_reverse_word_jis(char *word)
{
    char *p1, *p2;
    int word_length;
    char c;

    LOG(("in: eb_reverse_word_jis(word=%s)", eb_quoted_string(word)));

    word_length = strlen(word);
    if (word_length % 2 == 1) {
    *(word + word_length - 1) = '\0';
    word_length--;
    }
    for (p1 = word, p2 = word + word_length - 2; p1 < p2; p1 += 2, p2 -= 2) {
    c = *p1;
    *p1 = *p2;
    *p2 = c;
    c = *(p1 + 1);
    *(p1 + 1) = *(p2 + 1);
    *(p2 + 1) = c;
    }

    LOG(("out: eb_reverse_word_jis()"));
}