如何在字符串中找到句子中的单词数？

如何解决如何在字符串中找到句子中的单词数？

当句子的间距不均匀时，我如何找到句子中的单词数，例如：

"how     are you?"

下面是我的代码，但我没有得到预期的输出。

#include <iostream>
#include <string>

using namespace std;

int main() {
    string s = "how     are you?";

    int vowels = 0;
    int consonants = 0;
    int words = 1;

    for(int i= 0; i < s.length(); i++) {
        if(s[i] == 'A' || s[i] == 'E' || s[i] == 'I' || s[i] == 'O' || s[i] == 'U' || s[i] == 'a' || s[i] == 'e' || s[i] == 'i' || s[i] == 'o' || s[i] == 'u') {
            vowels+=1;
        } else if((s[i] == '     ') {
            words +=1;
        } else if ((s[i] == ' ')) {
            words += 1;
        } else {
            consonants += 1;
        }
    }

    cout << "number of vowels "<< vowels<< endl;
    cout << "number of words "<< words << endl;
    cout << "number of consonants "<< consonants << endl;

    return 0;
}

解决方法

以下是您的代码的更新版本：

#include <iostream>

#include <sstream>

#include <vector>

using namespace std;

int
main() {
  string s = "how     are you?";

  int vowels = 0;
  int consonants = 0;
  int words = 0;

  std::string buf;
  std::stringstream ss(s);
  std::vector < std::string > tokens;

  while (ss >> buf) {
    words++;

    for (int i = 0; i < buf.length(); i++) {
      if (buf[i] == 'A' || buf[i] == 'E' || buf[i] == 'I' || buf[i] == 'O' ||
        buf[i] == 'U' || buf[i] == 'a' || buf[i] == 'e' ||
        buf[i] == 'i' || buf[i] == 'o' || buf[i] == 'u') {
        vowels++;
      } else {
        consonants++;
      }
    }
  }

  cout << "number of vowels " << vowels << endl;
  cout << "number of words " << words << endl;
  cout << "number of consonants " << consonants << endl;

  return 0;
}

（PS：你可能想修改代码，让“?”之类的字符不算一个单词）

您还可以使用 Boost 库检查其他解决方案：link

你可以这样做：

int main() {
    string s = "how     are you?";

    int vowels = 0;
    int consonants = 0;
    int words = 0;

    for(int i= 0; i < s.length(); i++) {
        if(s[i] == 'A' || s[i] == 'E' || s[i] == 'I' || s[i] == 'O' || s[i] == 'U' || s[i] == 'a' || s[i] == 'e' || s[i] == 'i' || s[i] == 'o' || s[i] == 'u') {
            vowels+=1;
        } else if((s[i] == '     ') {
            words +=1;
        } else if (word == 0 || (i > 0 && s[i] != ' ' && s[i-1] == ' ')) {
            words += 1;
        } else {
            consonants += 1;
        }
    }

    cout << "number of vowels "<< vowels<< endl;
    cout << "number of words "<< words << endl;
    cout << "number of consonants "<< consonants << endl;

    return 0;
}

您可以简单地使用布尔标志来跟踪是否找到了一个词。当出现空格或任何其他标点符号（或非字母或数字）时，只需检查标志是真还是假，如果标志为真，则增加字数。修改后的代码看起来像这样，

#include <iostream>
#include <string>

using namespace std;

int main() {
    string s = "how     are you?";

    int vowels = 0;
    int consonants = 0;
    int words = 0;
    bool wordFound=false;
    for(int i= 0; i < s.length(); i++) {
        if(s[i] == 'A' || s[i] == 'E' || s[i] == 'I' || s[i] == 'O' || s[i] == 'U' || s[i] == 'a' || s[i] == 'e' || s[i] == 'i' || s[i] == 'o' || s[i] == 'u') {
            vowels+=1;
            wordFound=true;
        } else if (!((s[i]>='A' && s[i]<='Z') || (s[i]>='a' && s[i]<='z') || (s[i]>='0' && s[i]<='9'))) {
            if(wordFound)
            {
                words+=1;
            }
            wordFound=false;
        } else {
            wordFound=true;
            consonants += 1;
        }
    }
    if(wordFound)words+=1;

    cout << "number of vowels "<< vowels<< endl;
    cout << "number of words "<< words << endl;
    cout << "number of consonants "<< consonants << endl;

    return 0;
}

C++ 11 在标准中有一个正则表达式库。因此，除非您想“手动完成”，否则您可以只使用 <regex> 头文件。

#include <iostream>
#include <cinttypes>
#include <string>
#include <regex>

int main(int argc,const char* argv[]) {
  std::regex words("\\w+");

  std::string input("how     are you?");
  size_t nwords = 0;
  for (auto iter = std::sregex_iterator(input.begin(),input.end(),words);
       iter != std::sregex_iterator();
       ++iter) {
    std::cout << (*iter).str() << std::endl;
    nwords++;
  }
  std::cout << nwords << std::endl;
  return 0;
}

如果您确实想手动编写代码，从有限状态机的角度考虑问题可能是最简单的。

有 2 种状态：{IN_WORD,IN_SPACES}。迭代中的当前角色定义了当前状态。
当处于 IN_WORD 状态时，您将字符收集到一个字符串中。
当处于 IN_SPACES 状态时，您只需跳过该字符。
在从 IN_WORD -> IN_SPACES 的转换中，完成一个单词并增加您的单词计数器。
如果迭代完成时您处于 IN_WORD 状态（过去的最后一个字符），您还需要增加字数计数器。

#include <iostream>
#include <cinttypes>
#include <string>
#include <regex>
#include <cctype>

size_t manual_word_counter( const std::string& input) {
  if (input.empty()) // empty string is easy...
    return UINTMAX_C(0);
  
  enum State { IN_WORD,IN_SPACES };
  size_t index = UINTMAX_C(0);
  auto determine_state = [&input,&index] () -> State {
    auto c = input[index];
    if (std::isspace(c) || std::ispunct(c))
      return IN_SPACES;
    return IN_WORD;
  };
  size_t counter = UINTMAX_C(0);
  State currentState = determine_state();
  for (index = 1; index < input.size(); index++) {
    State newState = determine_state();
    if (currentState == IN_WORD && newState == IN_SPACES)
      counter++;
    currentState = newState;
  }
  if (currentState == IN_WORD)
    counter++;

  return counter;
}

int main(int argc,words);
       iter != std::sregex_iterator();
       ++iter) {
    std::cout << (*iter).str() << std::endl;
    nwords++;
  }
  std::cout << nwords << std::endl;
  std::cout
    << "manual solution yields: "
    << manual_word_counter(input) << " words." << std::endl;
  return 0;
}