B在字符数组C中搜索

如何解决B在字符数组C中搜索

我想在带有 word 的排序文本中找到字符串（在代码中名为 bsearch）。为什么 bsearch 似乎不起作用？

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

int cmp(const void *a,const void *b) {
    const char *ia = *(const char **)a;
    const char *ib = *(const char **)b;
    return strcmp(ia,ib);
}

int main() {
    char *text = malloc(sizeof(char *) * 1000);
    char *word = malloc(sizeof(char *) * 30);
    char **all_words = malloc(sizeof(char *) * 1);
    int count_word = 0;

    fgets(text,1000,stdin);
    fgets(word,30,stdin);
    char *tok = strtok(text," .\n");

    while (tok != NULL) {
        all_words[count_word] = malloc(sizeof(char) * (strlen(tok) + 1));
        all_words[count_word] = tok;
        all_words[count_word][strlen(tok)] = '\0';

        count_word++;
        all_words = realloc(all_words,(count_word + 1) * sizeof(char *));

        tok = strtok(NULL," .\n");
    }

    qsort(all_words,count_word,sizeof(char *),cmp);
    for (int i = 0; i < count_word; i++) {
        puts(all_words[i]);
    }

    void *pointer = bsearch(&word,all_words,sizeof(char*),cmp);

    if (pointer != NULL) {
        puts("exists");
    } else {
        puts("doesn't exist");
    }

    for (int i = 0; i < count_word; i++) {
        free(all_words[i]);    //---- error here
    }

    free(all_words);
    free(word);
    free(text);
}

为什么在使用 char* 后释放 strtok 数组时会出现此错误？使用strtok后有问题吗？（由@wildplasser 和@JonathanLeffler tnx 解决）

还有一个问题是关于我的 cmp 函数。我知道它可以正常工作，但我不记得为什么会这样......这段代码有什么区别？（@JonathanLeffler 也有解释）

int cmp(const void *a,const void *b) {
    const char *ia = (const char *)a;
    const char *ib = (const char *)b;
    puts(ia);
    return strcmp(ia,ib);
}

解决方法

如评论中所述，主要问题是您在未使用 strcpy() 的情况下错误分配了令牌。正如 wildplasser 中的 comment 所指出的，使用 POSIX strdup() 函数更简单（它还不是标准 C 的一部分，尽管这可能会在 C2x 中发生变化）。

次要问题是 word 包含换行符，而标记从不包含换行符。你需要删除它。

此代码进行了这些更改并添加了一个函数来转储字符串数组。

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

static int cmp(const void *a,const void *b)
{
    const char *ia = *(const char **)a;
    const char *ib = *(const char **)b;
    return strcmp(ia,ib);
}

static void dump_strings(const char *tag,size_t num_words,char **words);

int main(void)
{
    char *text = malloc(sizeof(char *) * 1000);
    char *word = malloc(sizeof(char *) * 30);
    char **all_words = malloc(sizeof(char *) * 1);
    int count_word = 0;

    fgets(text,1000,stdin);
    fgets(word,30,stdin);
    word[strcspn(word,"\n")] = '\0';
    char *tok = strtok(text," .\n");

    while (tok != NULL)
    {
        all_words[count_word++] = strdup(tok);
        all_words = realloc(all_words,(count_word + 1) * sizeof(char *));
        tok = strtok(NULL," .\n");
    }

    dump_strings("Before",count_word,all_words);

    qsort(all_words,sizeof(char *),cmp);
    dump_strings("After",all_words);

    void *pointer = bsearch(&word,all_words,cmp);

    if (pointer != NULL)
       printf("Word [%s] exists\n",word);
    else
       printf("Word [%s] does not exists\n",word);

    for (int i = 0; i < count_word; i++)
        free(all_words[i]);

    free(all_words);
    free(word);
    free(text);
    return 0;
}

static void dump_strings(const char *tag,char **words)
{
    printf("%s (%zu):\n",tag,num_words);
    for (size_t i = 0; i < num_words; i++)
        printf("%zu: [%s]\n",i,words[i]);
}

这是在一个文件 cpystr89.c 中编译成一个名为 cpystr89 的可执行文件。我使用以下 shell 脚本来运行命令：

./cpystr89 <<'EOF'
this is the text of the long string with many words in it and some repetition and so on.  of course,it is in mono-case for simplicity.  messieurs o'rourke and o'sullivan cause trouble; so does a semicolon
the
EOF

它的输出是：

Before (37):
0: [this]
1: [is]
2: [the]
3: [text]
4: [of]
5: [the]
6: [long]
7: [string]
8: [with]
9: [many]
10: [words]
11: [in]
12: [it]
13: [and]
14: [some]
15: [repetition]
16: [and]
17: [so]
18: [on]
19: [of]
20: [course,]
21: [it]
22: [is]
23: [in]
24: [mono-case]
25: [for]
26: [simplicity]
27: [messieurs]
28: [o'rourke]
29: [and]
30: [o'sullivan]
31: [cause]
32: [trouble;]
33: [so]
34: [does]
35: [a]
36: [semicolon]
After (37):
0: [a]
1: [and]
2: [and]
3: [and]
4: [cause]
5: [course,]
6: [does]
7: [for]
8: [in]
9: [in]
10: [is]
11: [is]
12: [it]
13: [it]
14: [long]
15: [many]
16: [messieurs]
17: [mono-case]
18: [o'rourke]
19: [o'sullivan]
20: [of]
21: [of]
22: [on]
23: [repetition]
24: [semicolon]
25: [simplicity]
26: [so]
27: [so]
28: [some]
29: [string]
30: [text]
31: [the]
32: [the]
33: [this]
34: [trouble;]
35: [with]
36: [words]
Word [the] exists

输入循环中的realloc()代码并不理想。通常，您应该怀疑任何每次迭代将内存分配增加一个单位的循环。随着时间的推移，这可能导致二次行为，因为旧的分配被复制到每个（或大多数）分配上。通常最好使用一种分配许多额外指针并一次分发一个的策略。在这种情况下，您可以简单地分配 strlen(text) / 2 + 1 指针；最多每个其他字符都是一个单词（例如，如果您键入 a b c d 作为文本）。通常，您会使用两个计数器：num_alloc 和 num_used。 num_alloc 值记录分配了多少指针； num_used 值记录正在使用的号码。您可能会在每次调用时将 num_alloc 加倍。如果您担心输入完成后过度分配，您可以使用收缩 realloc() 来释放额外空间。

您的代码不会检查每个内存分配是否成功。应该！

我还会注意到我不会将 malloc() 用于 text 和 word。您可以在堆栈上分配几千字节的内存。当您开始在堆栈上分配数百 KB 时会出现问题。 Windows 上的默认堆栈限制通常为 1 MiB；在大多数类 Unix 系统上，它是 8 MiB。无论哪种情况，一旦您考虑分配数百 KB，就该考虑使用 malloc() 等进行动态内存分配。不分配这些会简化错误清理工作。

此代码实现了这些更改。请注意对 bsearch() 的调用的变化。

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

static int cmp(const void *a,char **words);
static void free_strings(size_t num_words,char **words);

int main(void)
{
    char text[1000];
    char word[30];
    char **all_words = NULL;
    int count_words = 0;
    int alloc_words = 0;

    fgets(text,"\n")] = '\0';

    for (char *tok = strtok(text," .\n"); tok != NULL; tok = strtok(NULL," .\n"))
    {
        if (count_words >= alloc_words)
        {
            size_t new_count = alloc_words * 2 + 2;     // + 2 to get away from zero
            char **new_words = realloc(all_words,new_count * sizeof(*all_words));
            if (new_words == NULL)
            {
                fprintf(stderr,"Failed to reallocate %zu bytes memory\n",new_count * sizeof(*all_words));
                free_strings(count_words,all_words);
                exit(EXIT_FAILURE);
            }
            all_words = new_words;
            alloc_words = new_count;
        }
        if ((all_words[count_words++] = strdup(tok)) == NULL)
        {
            fprintf(stderr,"Failed to allocate %zu bytes memory\n",strlen(tok) + 1);
            free_strings(count_words,all_words);
            exit(EXIT_FAILURE);
        }
    }

    dump_strings("Before",count_words,all_words);

    char *p_word = word;        // Irksome but necessary; using &word in call to bsearch() crashes
    void *pointer = bsearch(&p_word,word);

    free_strings(count_words,all_words);

    return 0;
}

static void dump_strings(const char *tag,words[i]);
}

static void free_strings(size_t num_words,char **words)
{
    for (size_t i = 0; i < num_words; i++)
        free(words[i]);
    free(words);
}

输出——惊喜，惊喜——和之前一样。

我还将错误报告和退出代码封装到函数中。我将使用的代码在 GitHub 上的 SOQ（堆栈溢出问题）存储库中作为 src/libsoq 子目录中的文件 stderr.c 和 stderr.h 提供。

        if ((all_words[count_words++] = strdup(tok)) == NULL)
        {
            free_strings(count_words,all_words);
            err_syserr("Failed to allocate %zu bytes memory\n",strlen(tok) + 1);
        }

由于调用 free_strings()，这不像有时那样引人注目，但对于简单的情况，它将 4 行代码减少到 1 行，这让我不太愿意添加必要的错误检查。>

bsearch 使用的比较函数考虑一个元素数组（必须将其大小作为参数传递），然后通过引用进行比较（这意味着，您会收到指向字符的双指针，而不是单指针）这是因为 bsearch 例程是通用的，可以传递给它的元素本身可以是结构体或大数组。唯一的要求是它们的大小必须相等。

这就是为什么您在调用比较器时取消引用指针，然后使用取消引用的指针调用 strcmp 的原因。 Strcmp 只获取指向 char 的指针，而不是指向 char 的指针的引用，这就是 strcmp 本身不足以用于传递给 bsearch 的原因。同样的问题也适用于 qsort 和朋友。

最后一个提示：如果不需要，不要使用演员表。这适用于您在 ia 和 ib 的声明中所做的转换。你最好把你的比较函数写成：

int cmp(const void *a,const void *b) {
    const char * const *ia = a;
    const char * const *ib = b;
    return strcmp(*ia,*ib);
}

如果你犯了一个错误，你会收到编译器的投诉（就像我在准备这个例子时所做的那样；））。