how to extract specific parts from a dynamic input and then put them together?
cmd1 = "dir"
args1 = "/a"
path = "c:\\users\\"
cmd2 = '| find "stuff"'
cmd3 = "more"
path2 = "c:\\windows\\win.ini"
cmd4 = '| findstr "fonts"'
cmd5 = "certutil.exe"
args2 = "-encode \\dir\\file"
extra_cmds = "& something"
Having in input a command, i would like to extract using python in a separate way the command, the path involved, the args and the extra commands concatenated, so that i can work with every single piece if needed, and then build them back all together
so, for example:
dir /a c:\users\ | find "stuff"
cmd1 = "dir"
args1 = "/a"
path = "c:\\users\\"
cmd2 = '| find "stuff"'
this would be an easy task if the commands would be always the same or with the same patterns, but how can I do it if they are always different and with a different syntax?
some more examples:
more c:\windows\win.ini | findstr "fonts"
certutil.exe -encode \dir\file & something
得分: 1
import re
examples = [
'dir /a c:\\users\\ | find "stuff"',
'more c:\\windows\\win.ini | findstr "fonts"',
'certutil.exe -encode \\dir\\file & something',
'command -option1 -option2 -option3',
'command "path with spaces" -option',
'command > output.txt',
'command "path with \'nested\' quotes" [optional argument]',
'echo "This is a test" | find /v "exclude"',
'python script.py arg1 arg2 --option',
'echo $HOME',
'dir /s /b | findstr /r "\.txt$"',
'command1 | command2 | command3',
'command1 && command2 || command3',
'command1 | command2 > output.txt 2>&1',
'command arg1^&arg2 arg3',
'echo ^<output^>',
'command1 arg1 | command2 arg2',
'command -option="value"',
'command --long-option arg1 arg2',
'command [optional argument] -option=value',
'command arg1 ^& command2 arg2',
'command "path with spaces" | find "search phrase"',
'python -c "print(2 + 2)"',
'curl -X POST -d @data.txt http://example.com/api',
'grep -r "pattern" /path/to/search --exclude-dir=dir',
'ls -l | awk \'{print $1, $2}\'',
'command < input.txt',
'command arg1 -f file.txt',
'command "string with $special$ chars"',
'command --verbose --debug',
'command arg1 ^> output.txt',
'command [optional argument1] --flag --option=value [optional argument2]',
import re
def process_command(command):
command = re.split(r" \| | & |&&|\|\|", command)
for com in command:
com = com.split(" ")
print(f"Command: {com[0]}")
string_parts = []
i = 1
in_long_string = False
quote_mark = None
arg_number = 0
option_number = 0
while i < len(com):
if in_long_string:
if quote_mark == com[i][-1]:
print(f"String: {' '.join(string_parts)}")
string_parts = []
in_long_string = False
elif re.match(r"^[a-zA-Z]:\\|^\\", com[i]): # if it starts with a letter then :// or starts with \, it's a path
print(f"Path: {com[i]}")
elif re.match(r"^/|-", com[i]): # if it starts with / or -, it's an option
option_number += 1
print(f"Option {option_number}: {com[i]}")
elif re.match(r"^('|\").+('|\")$", com[i]): # if it's surrounded by quotes, it's a string
print(f"String: {com[i]}")
elif re.match(r"^('|\").*$", com[i]): # if it starts with a quote mark, it's the start of a long string
quote_mark = com[i][0]
in_long_string = True
elif re.match(r">$", com[i]): # if it ends in a >, it's an output thing
print(f"Output: {com[i+1]}")
i += 1
elif re.match(r"<$", com[i]): # likewise with inputs
print(f"Input: {com[i+1]}")
else: # otherwise it's some other kind of argument
arg_number += 1
print(f"Argument {arg_number}: {com[i]}")
i += 1
[process_command(command) for command in examples]
This was fun! Here is my attempt:
import re
examples = [
'dir /a c:\\users\\ | find "stuff"',
'more c:\\windows\\win.ini | findstr "fonts"',
'certutil.exe -encode \\dir\\file & something',
'command -option1 -option2 -option3',
'command "path with spaces" -option',
'command > output.txt',
'command "path with \'nested\' quotes" [optional argument]',
'echo "This is a test" | find /v "exclude"',
'python script.py arg1 arg2 --option',
'echo $HOME',
'dir /s /b | findstr /r "\.txt$"',
'command1 | command2 | command3',
'command1 && command2 || command3',
'command1 | command2 > output.txt 2>&1',
'command arg1^&arg2 arg3',
'echo ^<output^>',
'command1 arg1 ^| command2 arg2',
'command -option="value"',
'command --long-option arg1 arg2',
'command [optional argument] -option=value',
'command arg1 ^& command2 arg2',
'command "path with spaces" ^| find "search phrase"',
'python -c "print(2 + 2)"',
'curl -X POST -d @data.txt http://example.com/api',
'grep -r "pattern" /path/to/search --exclude-dir=dir',
'ls -l | awk \'{print $1, $2}\'',
'command < input.txt',
'command arg1 -f file.txt',
'command "string with $special$ chars"',
'command --verbose --debug',
'command arg1 ^> output.txt',
'command [optional argument1] --flag --option=value [optional argument2]',
import re
def process_command(command):
command = re.split(r" \| | & |&&|\|\|", command)
for com in command:
com = com.split(" ")
print(f"Command: {com[0]}")
string_parts = []
i = 1
in_long_string = False
quote_mark = None
arg_number = 0
option_number = 0
while i < len(com):
if in_long_string:
if quote_mark == com[i][-1]:
print(f"String: {' '.join(string_parts)}")
string_parts = []
in_long_string = False
elif re.match(r"^[a-zA-Z]:\\|^\\", com[i]): # if it starts with a letter then :// or starts with \, it's a path
print(f"Path: {com[i]}")
elif re.match(r"^/|-", com[i]): # if it starts with / or -, it's an option
option_number += 1
print(f"Option {option_number}: {com[i]}")
elif re.match(r"^(\"|\').+(\"|\')$", com[i]): # if it's surrounded by quotes, it's a string
print(f"String: {com[i]}")
elif re.match(r"^(\"|\').*$", com[i]): # if it starts with a quote mark, it's the start of a long string
quote_mark = com[i][0]
in_long_string = True
elif re.match(r">$", com[i]): # if it ends in a >, it's an output thing
print(f"Output: {com[i+1]}")
i += 1
elif re.match(r"<$", com[i]): # likewise with inputs
print(f"Input: {com[i+1]}")
else: # otherwise it's some other kind of argument
arg_number += 1
print(f"Argument {arg_number}: {com[i]}")
i += 1
[process_command(command) for command in examples]
Comment with some more examples if it doesn't work for anything