英文:
c++ multiple regex extractions to an array from dbc entry
问题
以下是要翻译的代码部分:
auto blah = std::string{"5001 | 5002 | 5003"};
auto values = std::vector<std::string>{
std::sregex_token_iterator{blah.begin(), blah.end(), std::regex{R"(\d+)"}},
std::sregex_token_iterator{}};
请注意,这段代码似乎包含了HTML实体编码(例如,"
和<
)。如果您想要解析字符串中的值,您可以使用正则表达式来提取所需的数据。在这个情况下,您可以使用正则表达式来匹配数字和带引号的描述,然后按照需要进行进一步处理。如果需要进一步的帮助,请提出具体的问题。
英文:
Hello I would like to extract the parameters from the following string:
VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special characters &$§())!" ;
The desired matches are
- 234
- State1
- and then an array with unsigned integer and string combination
- 123 "Description 1"
- 0 "Description 2 with \n new line"
- 90903489 "Big value and special characters &$§())!"
The array shall be split in a second step if it is not possible to do it directly.
With the following regex I just get always the last match of the array 90903489 "Big value and special characters &$§())!"
^VAL_ ([0-9]+) ([A-Za-z_][A-Za-z_0-9]*) ([0-9]*\\s\"[^\"]*\"\\s)+
Is there a possibility to extract the values?
I found already
auto blah = std::string{"5001 | 5002 | 5003"};
auto values = std::vector<std::string>{
std::sregex_token_iterator{blah.begin(), blah.end(), std::regex{R"(\d+)"}},
std::sregex_token_iterator{}};
from this post but it returns me just the complete string. Is there a possibility to iterate over the submatches?
答案1
得分: 0
以下是代码部分的翻译:
Sample code:
const std::string input{ R"(VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special
characters &$§())!")" };
const std::regex regex{ R"((?:^VAL_\s(\d+)\s(\w+)|\s(\d+\s".+?")))" };
const std::sregex_iterator end{};
for(auto it = std::sregex_iterator{ std::cbegin(input), std::cend(input), regex };
it != end; ++it) {
auto match = *it;
if (match.empty()) {
std::cerr << "Nothing matched" << '\n';
continue;
} else {
if (match[1].matched) {
std::cout << "Val match: " << match[1].str() << '\n';
}
if (match[2].matched) {
std::cout << "State match: " << match[2].str() << '\n';
}
if (match[3].matched) {
std::cout << "Etc match: " << match[3].str() << '\n';
}
}
}
如果您有其他需要翻译的内容,请随时提出。
英文:
Not sure if you have any specific requirements on how the matches need to be separated, but you can match either of the patterns with the following regular expression:
(?:^VAL_\s(\d+)\s(\w+)|\s(\d+\s".+?"))
Sample code:
const std::string input{ R"(VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special
characters &$§())!")" };
const std::regex regex{ R"((?:^VAL_\s(\d+)\s(\w+)|\s(\d+\s".+?")))" };
const std::sregex_iterator end{};
for(auto it = std::sregex_iterator{ std::cbegin(input), std::cend(input), regex };
it != end; ++it) {
auto match = *it;
if (match.empty()) {
std::cerr << "Nothing matched" << '\n';
continue;
} else {
if (match[1].matched) {
std::cout << "Val match: " << match[1].str() << '\n';
}
if (match[2].matched) {
std::cout << "State match: " << match[2].str() << '\n';
}
if (match[3].matched) {
std::cout << "Etc match: " << match[3].str() << '\n';
}
}
}
答案2
得分: 0
根据@rustyx的链接,我创建了自己的解析器。
enum VALToken {
Identifier = 0,
CANId,
SignalName,
Value,
Description
};
struct ValueDescription {
std::string value;
std::string description;
};
int main(int argc, char *argv[]) {
const std::string s = R"(VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special characters &$§())!" ;)";
auto state = Identifier;
const char* a = s.data();
std::string can_id;
std::string signal_name;
std::vector<ValueDescription> vds;
ValueDescription vd;
for (;;) {
switch (state) {
case Identifier: {
if (*a != 'V')
return 0;
a++;
if (*a != 'A')
return 0;
a++;
if (*a != 'L')
return 0;
a++;
if (*a != '_')
return 0;
a++;
if (*a != ' ')
return 0;
a++; // 跳过空格
state = CANId;
break;
}
case CANId: {
while (*a >= '0' && *a <= '9') {
can_id += *a;
a++;
}
if (can_id.empty())
return 0;
if (*a != ' ')
return 0;
a++; // 跳过空格
state = SignalName;
break;
}
case SignalName: {
if ((*a >= 'a' && *a <= 'z') || (*a >= 'A' && *a <= 'Z') || *a == '_')
signal_name += *a;
else
return 0;
a++;
while ((*a >= 'a' && *a <= 'z') || (*a >= 'A' && *a <= 'Z') || *a == '_' || (*a >= '0' && *a <= '9')) {
signal_name += *a;
a++;
}
if (*a != ' ')
return 0;
a++; // 跳过空格
state = Value;
break;
}
case Value: {
std::string value_str;
while (*a >= '0' && *a <= '9') {
value_str += *a;
a++;
}
if (value_str.empty())
return 0;
if (*a != ' ')
return 0;
a++; // 跳过空格
vd.value = value_str;
state = Description;
break;
}
case Description: {
std::string desc;
if (*a != '"')
return 0;
a++;
while (*a != '"' && *a != 0) {
desc += *a;
a++;
}
if (*a == 0)
return 0;
a++;
if (*a != ' ')
return 0;
a++; // 跳过空格
vd.description = desc;
vds.push_back(vd);
state = Value;
break;
}
}
}
return 0;
}
英文:
Based on @rustyx link I created my own parser
enum VALToken {
Identifier = 0,
CANId,
SignalName,
Value,
Description
};
struct ValueDescription{
std::string value;
std::string description;
};
int main(int argc, char *argv[])
{
const std::string s = R"(VAL_ 234 State1 123 "Description 1" 0 "Description 2 with \n new line" 90903489 "Big value and special characters &$§())!" ;)";
auto state = Identifier;
const char* a = s.data();
std::string can_id;
std::string signal_name;
std::vector<ValueDescription> vds;
ValueDescription vd;
for (;;) {
switch (state) {
case Identifier: {
if (*a != 'V')
return 0;
a++;
if (*a != 'A')
return 0;
a++;
if (*a != 'L')
return 0;
a++;
if (*a != '_')
return 0;
a++;
if (*a != ' ')
return 0;
a++; // skip whitespace
state = CANId;
break;
}
case CANId: {
while(*a >= '0' && *a <= '9') {
can_id += *a;
a++;
}
if (can_id.empty())
return 0;
if (*a != ' ')
return 0;
a++; // skip whitespace
state = SignalName;
break;
}
case SignalName: {
if ((*a >= 'a' && *a <= 'z') || (*a >= 'A' && *a <= 'Z') || *a == '_')
signal_name += *a;
else
return 0;
a++;
while ((*a >= 'a' && *a <= 'z') || (*a >= 'A' && *a <= 'Z') || *a == '_' || (*a >= '0' && *a <= '9')) {
signal_name += *a;
a++;
}
if (*a != ' ')
return 0;
a++; // skip whitespace
state = Value;
break;
}
case Value: {
std::string value_str;
while (*a >= '0' && *a <= '9') {
value_str += *a;
a++;
}
if (value_str.empty())
return 0;
if (*a != ' ')
return 0;
a++; // skip whitespace
vd.value = value_str;
state = Description;
break;
}
case Description: {
std::string desc;
if (*a != '"')
return 0;
a++;
while (*a != '"' && *a != 0) {
desc += *a;
a++;
}
if (*a == 0)
return 0;
a++;
if (*a != ' ')
return 0;
a++; // skip whitespace
vd.description = desc;
vds.push_back(vd);
state = Value;
break;
}
}
}
return 0;
}
答案3
得分: 0
我会执行一个regex_match
,然后使用sregex_iterator
进行循环。
#include <fmt/core.h>
#include <regex>
#include <string>
int main() {
const std::string text{ "VAL_ 234 State1"
" 123 \"Description 1\""
" 0 \"Description 2 with \\n new line\""
" 90903489 \"Big value and special characters &$#167;())!\""
};
const std::regex pattern{ R"(VAL_ (\d+) \w+(\d+)(.*))" };
std::smatch matches{};
if (std::regex_match(text, matches, pattern)) {
fmt::print("{}\n{}\n", matches[1].str(), matches[2].str());
std::regex array_pattern{ R"(\s+(\d+)\s+"([^"]+)")" };
auto array_text{ matches[3].str() };
for (std::sregex_iterator it{ array_text.begin(), array_text.end(), array_pattern };
it != std::sregex_iterator{};
++it) {
std::smatch array_matches{ *it };
fmt::print("\t'{}', '{}'\n", array_matches[1].str(), array_matches[2].str());
}
}
}
// 输出:
//
// 234
// 1
// '123', 'Description 1'
// '0', 'Description 2 with \n new line'
// '90903489', 'Big value and special characters &$#167;())!'
英文:
I would do a regex_match
followed by a loop using an sregex_iterator
.
#include <fmt/core.h>
#include <regex>
#include <string>
int main() {
const std::string text{ "VAL_ 234 State1"
" 123 \"Description 1\""
" 0 \"Description 2 with \\n new line\""
" 90903489 \"Big value and special characters &$§())!\""
};
const std::regex pattern{ R"(VAL_ (\d+) \w+(\d+)(.*))" };
std::smatch matches{};
if (std::regex_match(text, matches, pattern)) {
fmt::print("{}\n{}\n", matches[1].str(), matches[2].str());
std::regex array_pattern{ R"(\s+(\d+)\s+\"([^"]+)\")" };
auto array_text{ matches[3].str() };
for (std::sregex_iterator it{ array_text.begin(), array_text.end(), array_pattern };
it != std::sregex_iterator{};
++it) {
std::smatch array_matches{ *it };
fmt::print("\t'{}', '{}'\n", array_matches[1].str(), array_matches[2].str());
}
}
}
// Outputs:
//
// 234
// 1
// '123', 'Description 1'
// '0', 'Description 2 with \n new line'
// '90903489', 'Big value and special characters &$§())!'
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论