英文:
Python regex find match words by AND conditio
问题
def count_conditions(input_str, listbox):
electric_attributes = ['electric', 'batter', 'power']
input_str = input_str.strip().replace('\r\n', '\n').replace('\r', '\n')
pattern = re.compile(r'(\{AND.*?\})', re.DOTALL)
matches = re.findall(pattern, input_str)
line_num = 1
and_syntax_depth = 0
for match in matches:
if '{AND' in match:
and_syntax_depth += 1
if '{/AND' in match:
and_syntax_depth -= 1
if and_syntax_depth == 0:
attrs_found = []
words = re.findall(r'\b\w+\b', match)
for word in words:
for attr in electric_attributes:
if attr in word.lower() and word not in attrs_found:
attrs_found.append(word)
if attrs_found:
message = "{} electrical attribute(s) found on lines {}-{} of AND condition: {}".format(
len(attrs_found),
line_num,
line_num + len(match.splitlines()) - 1,
', '.join(attrs_found)
)
else:
message = "No electrical attribute(s) found on lines {}-{} of AND condition".format(
line_num,
line_num + len(match.splitlines()) - 1,
)
listbox.insert("end", message)
line_num += match.count('\n') + 1
With this code modification, the code will count electrical attributes in AND conditions only if they are not inside nested AND conditions. This should provide the desired output as mentioned in your example.
英文:
def count_conditions(input_str, listbox):
electric_attributes = ['electric', 'batter', "power"]
input_str = input_str.strip().replace('\r\n', '\n').replace('\r', '\n')
pattern = re.compile(r'(\{AND.*?\})', re.DOTALL)
matches = re.findall(pattern, input_str)
line_num = 1
for match in matches:
attrs_found = []
words = re.findall(r'\b\w+\b', match)
for word in words:
for attr in electric_attributes:
if attr in word.lower() and word not in attrs_found:
attrs_found.append(word)
if attrs_found:
message = "{} electrical attribute(s) found on lines {}-{} of AND condition: {}".format(
len(attrs_found),
line_num,
line_num + len(match.splitlines()) - 1,
', '.join(attrs_found)
)
else:
message = "No electrical attribute(s) found on lines {}-{} of AND condition".format(
line_num,
line_num + len(match.splitlines()) - 1,
)
listbox.insert("end", message)
line_num += match.count('\n') + 1
I want the code to be able to determine the external "and" syntaxes, and not those that are already inside AND syntaxes.
For example lines 1-14 should be treated as 1 AND syntax, not 3.
{AND
condition_1:batterie
condition_2:samsung
{OR
{AND
condition_1:TV
condition_2:LCD
}
{AND
condition_1:remote
condition_2:TV
}
}
}
{AND
condition_1:batterie
condition_2:samsung
{OR
condition_3:TV
condition_4:remote
}
}
The Output should be:
1 electrical attribute found on lines: 1-14 of AND condition: batterie
1 electric attribute found on lines 15-22 of AND condition: batterie.
Please help! What am I doing wrong?
Or maybe other approach?
答案1
得分: 2
def count_conditions(input_str):
electric_attributes = ['electric', 'batterie', "power"]
input_lines =
open_brackets = 0
for i, line in enumerate(input_lines):
if open_brackets == 0:
# Start of a new outer 'AND' block
if '{AND' in line:
open_brackets = line.count('{')
current_block = line
block_start = i + 1
continue
else:
# this shouldn't actually happen! But if it does, ignore the line
continue
current_block += line
if '{' in line:
open_brackets += line.count('{')
if '}' in line:
open_brackets -= line.count('}')
if open_brackets == 0:
# We've reached the end of the current external 'AND' block
attrs_found = [attr for attr in electric_attributes if attr in current_block]
if attrs_found:
l = len(attrs_found)
print(f'{l} electrical attribute{"s" if l > 1 else ""} found on lines {block_start}-{i + 1}'
f' of AND condition: {" ,".join(attrs_found)}')
else:
print(f'No electrical attribute found on lines {block_start}-{i + 1}'
f' of AND condition')
EXAMPLE
test = '''
{AND
condition_1:batterie
condition_2:samsung
{OR
{AND
condition_1:TV
condition_2:LCD
}
{AND
condition_1:remote
condition_2:TV
}
}
}
{AND
condition_1:batterie
condition_2:samsung
{OR
condition_3:TV
condition_4:remote
}
}
'''
count_conditions(test)
# 1 electrical attribute found on lines 1-14 of AND condition: batterie
# 1 electrical attribute found on lines 15-22 of AND condition: batterie
英文:
I wrote some code based on my comment (I made some changes to yours too, printing the lines instead of inserting them in the unknown listbox
object and updating electric_attributes
with the full name of batterie
).
def count_conditions(input_str):
electric_attributes = ['electric', 'batterie', "power"]
input_lines =
open_brackets = 0
for i, line in enumerate(input_lines):
if open_brackets == 0:
# Start of a new outer 'AND' block
if '{AND' in line:
open_brackets = line.count('{')
current_block = line
block_start = i + 1
continue
else:
# this shouldn't actually happen! But if it does, ignore the line
continue
current_block += line
if '{' in line:
open_brackets += line.count('{')
if '}' in line:
open_brackets -= line.count('}')
if open_brackets == 0:
# We've reached the end of the current external 'AND' block
attrs_found = [attr for attr in electric_attributes if attr in current_block]
if attrs_found:
l = len(attrs_found)
print(f'{l} electrical attribute{"s" if l > 1 else ""} found on lines {block_start}-{i + 1}'
f' of AND condition: {" ,".join(attrs_found)}')
else:
print(f'No electrical attribute found on lines {block_start}-{i + 1}'
f' of AND condition')
EXAMPLE
test='''
{AND
condition_1:batterie
condition_2:samsung
{OR
{AND
condition_1:TV
condition_2:LCD
}
{AND
condition_1:remote
condition_2:TV
}
}
}
{AND
condition_1:batterie
condition_2:samsung
{OR
condition_3:TV
condition_4:remote
}
}
'''
count_conditions(test)
# 1 electrical attribute found on lines 1-14 of AND condition: batterie
# 1 electrical attribute found on lines 15-22 of AND condition: batterie
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论