通过正则表达式拆分字符串并获取匹配范围

huangapple go评论45阅读模式
英文:

Split string by regrex and get matching range

问题

func splitWithRegex(by regexStr: String) -> [(String, Range<String.Index>)] {
    guard let regex = try? NSRegularExpression(pattern: regexStr) else { return [] }
    let nsRange = NSRange(startIndex..., in: self)
    var index = startIndex
    var array = regex.matches(in: self, range: nsRange)
        .map { match -> (String, Range<String.Index>) in
            let range = Range(match.range, in: self)!
            let result = self[index..<range.lowerBound]
            let substringRange = index..<range.upperBound
            index = range.upperBound
            return (String(result), substringRange)
        }
    array.append((String(self[index...]), index..<endIndex))
    return array
}
英文:
extension String {
	func splitWithRegex(by regexStr: String) -&gt; [String] {
		guard let regex = try? NSRegularExpression(pattern: regexStr) else { return [] }
		let nsRange = NSRange(startIndex..., in: self)
		var index = startIndex
		var array = regex.matches(in: self, range: nsRange)
			.map { match -&gt; String in
				let range = Range(match.range, in: self)!
				let result = self[index..&lt;range.lowerBound]
				index = range.upperBound
				return String(result)
			}
		array.append(String(self[index...]))
		return array
	}
}

I use the above code to split a string on Swift using a regrex pattern and return an array of strings

How can I return an object of [(String, Range)] so I will use the split substrings as keys and it's range as the value.

For example:

let string = &quot;This is a string&quot;
let stringRange = string.splitWithRegex(by: &quot;\\s+&quot;)

Result:

(This, 0...4)
(is, 5...7)
(a, 8...9)
(string, 10...16)

答案1

得分: 1

下面是代码的翻译部分:

func splitWithRegex2(by regexStr: String) -> [(String, Range<String.Index>)] {
    guard let regex = try? NSRegularExpression(pattern: regexStr) else { return [] }
    let nsRange = NSRange(startIndex..., in: self)
    var index = startIndex
    var array = regex.matches(in: self, range: nsRange)
        .compactMap { match -> (String, Range<String.Index>)? in
            guard let range = Range(match.range, in: self) else { return nil }
            let resultRange = index..<range.lowerBound
            let string = String(self[resultRange])
            index = range.upperBound
            guard !resultRange.isEmpty else { return nil } //This should fix the issue where the first match starts your string, in your case, an empty space
            let values = (string, resultRange)
            return values
        }

    //Append last value check: needed?
    if index < endIndex {
        array.append((String(self[index...]), index..<endIndex))
    }
    return array
}

要玩:

let stringReg1 = "This  is a string"
let stringRange1 = stringReg1.splitWithRegex2(by: "\\s+")
print(stringRange1)
stringRange1.forEach { 
    let nsRange = NSRange($0.1, in: stringReg1)
    print("\($0.0), \(nsRange.location)...\(nsRange.location + nsRange.length)")
}

输出:

[("This", Range(Swift.String.Index(_rawBits: 1)..<Swift.String.Index(_rawBits: 262144))), ("is", Range(Swift.String.Index(_rawBits: 393216)..<Swift.String.Index(_rawBits: 524288))), ("a", Range(Swift.String.Index(_rawBits: 589824)..<Swift.String.Index(_rawBits: 655360))), ("string", Range(Swift.String.Index(_rawBits: 720896)..<Swift.String.Index(_rawBits: 1114113)))]
This, 0...4
is, 6...8
a, 9...10
string, 11...17
let stringReg2 = "    This  is a string   "
let stringRange2 = stringReg2.splitWithRegex2(by: "\\s+")
print(stringRange2)
stringRange2.forEach { 
    let nsRange = NSRange($0.1, in: stringReg2)
    print("\($0.0), \(nsRange.location)...\(nsRange.location + nsRange.length)")
}

输出:

[("This", Range(Swift.String.Index(_rawBits: 262144)..<Swift.String.Index(_rawBits: 524288))), ("is", Range(Swift.String.Index(_rawBits: 655360)..<Swift.String.Index(_rawBits: 786432))), ("a", Range(Swift.String.Index(_rawBits: 851968)..<Swift.String.Index(_rawBits: 917504))), ("string", Range(Swift.String.Index(_rawBits: 983040)..<Swift.String.Index(_rawBits: 1376256)))]
This, 4...8
is, 10...12
a, 13...14
string, 15...21

这是你提供的代码的中文翻译。如果你需要进一步的帮助,请随时告诉我。

英文:

I guess that should do the trick, but I'm not totally sure:

func splitWithRegex2(by regexStr: String) -&gt; [(String, Range&lt;String.Index&gt;)] {
    guard let regex = try? NSRegularExpression(pattern: regexStr) else { return [] }
    let nsRange = NSRange(startIndex..., in: self)
    var index = startIndex
    var array = regex.matches(in: self, range: nsRange)
        .compactMap { match -&gt; (String, Range&lt;String.Index&gt;)? in
            guard let range = Range(match.range, in: self) else { return nil }
            let resultRange = index..&lt;range.lowerBound
            let string = String(self[resultRange])
            index = range.upperBound
            guard !resultRange.isEmpty else { return nil } //This should fix the issue where the first match starts your string, in your case, an empty space
            let values = (string, resultRange)
            return values
        }

    //Append last value check: needed?
    if index &lt; endIndex {
        array.append((String(self[index...]), index..&lt;endIndex))
    }
    return array
}

To play:

let stringReg1 = &quot;This  is a string&quot;
let stringRange1 = stringReg1.splitWithRegex2(by: &quot;\\s+&quot;)
print(stringRange1)
stringRange1.forEach { //More userfriendly than Range(Swift.String.Index(_rawBits: 65536)..&lt;Swift.String.Index(_rawBits: 327680)))
    let nsRange = NSRange($0.1, in: stringReg1)
    print(&quot;\($0.0), \(nsRange.location)...\(nsRange.location + nsRange.length)&quot;)
}

Output:

$&gt;[(&quot;This&quot;, Range(Swift.String.Index(_rawBits: 1)..&lt;Swift.String.Index(_rawBits: 262144))), (&quot;is&quot;, Range(Swift.String.Index(_rawBits: 393216)..&lt;Swift.String.Index(_rawBits: 524288))), (&quot;a&quot;, Range(Swift.String.Index(_rawBits: 589824)..&lt;Swift.String.Index(_rawBits: 655360))), (&quot;string&quot;, Range(Swift.String.Index(_rawBits: 720896)..&lt;Swift.String.Index(_rawBits: 1114113)))]
$&gt;This, 0...4
$&gt;is, 6...8
$&gt;a, 9...10
$&gt;string, 11...17
let stringReg2 = &quot;    This  is a string   &quot;
let stringRange2 = stringReg2.splitWithRegex2(by: &quot;\\s+&quot;)
print(stringRange2)
stringRange2.forEach { //More userfriendly than Range(Swift.String.Index(_rawBits: 65536)..&lt;Swift.String.Index(_rawBits: 327680)))
    let nsRange = NSRange($0.1, in: stringReg2)
    print(&quot;\($0.0), \(nsRange.location)...\(nsRange.location + nsRange.length)&quot;)
}

Output:

$&gt;[(&quot;This&quot;, Range(Swift.String.Index(_rawBits: 262144)..&lt;Swift.String.Index(_rawBits: 524288))), (&quot;is&quot;, Range(Swift.String.Index(_rawBits: 655360)..&lt;Swift.String.Index(_rawBits: 786432))), (&quot;a&quot;, Range(Swift.String.Index(_rawBits: 851968)..&lt;Swift.String.Index(_rawBits: 917504))), (&quot;string&quot;, Range(Swift.String.Index(_rawBits: 983040)..&lt;Swift.String.Index(_rawBits: 1376256)))]
$&gt;This, 4...8
$&gt;is, 10...12
$&gt;a, 13...14
$&gt;string, 15...21

huangapple
  • 本文由 发表于 2023年2月10日 02:47:32
  • 转载请务必保留本文链接:https://go.coder-hub.com/75403133.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定