如何找到两个字符串切片之间的差异?

huangapple go评论96阅读模式
英文:

How to find the difference between two slices of strings

问题

这是我期望的结果

slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "bar"}

difference(slice1, slice2)
=> ["hello"]

我正在寻找这两个字符串切片之间的差异!

英文:

Here is my desired outcome

slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}

difference(slice1, slice2)
=> ["hello"]

I am looking for the difference between the two string slices!

答案1

得分: 81

假设Go的映射(maps)的时间复杂度为~O(1),这里是一个在未排序切片上工作的~O(n)差异函数。

// difference返回`a`中不在`b`中的元素。
func difference(a, b []string) []string {
    mb := make(map[string]struct{}, len(b))
    for _, x := range b {
        mb[x] = struct{}{}
    }
    var diff []string
    for _, x := range a {
        if _, found := mb[x]; !found {
            diff = append(diff, x)
        }
    }
    return diff
}

这个函数通过创建一个映射(map)mb来存储切片b中的元素,并遍历切片a,将不在mb中的元素添加到结果切片diff中,最后返回diff。这样就可以找到a中不在b中的元素。

英文:

Assuming Go maps are ~O(1), here is an ~O(n) difference function that works on unsorted slices.

// difference returns the elements in `a` that aren't in `b`.
func difference(a, b []string) []string {
    mb := make(map[string]struct{}, len(b))
    for _, x := range b {
        mb[x] = struct{}{}
    }
    var diff []string
    for _, x := range a {
        if _, found := mb[x]; !found {
            diff = append(diff, x)
        }
    }
    return diff
}

答案2

得分: 25

根据切片的大小,可能有不同的最佳解决方案。

我的答案假设顺序不重要。

使用简单循环,仅适用于较小的切片:

package main

import "fmt"

func difference(slice1 []string, slice2 []string) []string {
    var diff []string

    // 循环两次,第一次查找不在slice2中的slice1字符串,
    // 第二次循环查找不在slice1中的slice2字符串
    for i := 0; i < 2; i++ {
        for _, s1 := range slice1 {
            found := false
            for _, s2 := range slice2 {
                if s1 == s2 {
                    found = true
                    break
                }
            }
            // 字符串未找到。将其添加到返回的切片中
            if !found {
                diff = append(diff, s1)
            }
        }
        // 仅在第一次循环时交换切片
        if i == 0 {
            slice1, slice2 = slice2, slice1
        }
    }

    return diff
}

func main() {
    slice1 := []string{"foo", "bar", "hello"}
    slice2 := []string{"foo", "world", "bar", "foo"}

    fmt.Printf("%+v\n", difference(slice1, slice2))
}

输出结果:

[hello world]

Playground: http://play.golang.org/p/KHTmJcR4rg

英文:

Depending on the size of the slices, different solutions might be best.

My answer assumes order doesn't matter.

Using simple loops, only to be used with smaller slices:

package main

import &quot;fmt&quot;

func difference(slice1 []string, slice2 []string) []string {
	var diff []string

	// Loop two times, first to find slice1 strings not in slice2,
	// second loop to find slice2 strings not in slice1
	for i := 0; i &lt; 2; i++ {
		for _, s1 := range slice1 {
			found := false
			for _, s2 := range slice2 {
				if s1 == s2 {
					found = true
					break
				}
			}
			// String not found. We add it to return slice
			if !found {
				diff = append(diff, s1)
			}
		}
		// Swap the slices, only if it was the first loop
		if i == 0 {
			slice1, slice2 = slice2, slice1
		}
	}

	return diff
}

func main() {
	slice1 := []string{&quot;foo&quot;, &quot;bar&quot;, &quot;hello&quot;}
	slice2 := []string{&quot;foo&quot;, &quot;world&quot;, &quot;bar&quot;, &quot;foo&quot;}

	fmt.Printf(&quot;%+v\n&quot;, difference(slice1, slice2))
}

Output:

[hello world]

Playground: http://play.golang.org/p/KHTmJcR4rg

答案3

得分: 15

我使用地图来解决这个问题

package main

import "fmt"

func main() {
	slice1 := []string{"foo", "bar","hello"}
	slice2 := []string{"foo", "bar","world"}

	diffStr := difference(slice1, slice2)

	for _, diffVal := range diffStr {
		fmt.Println(diffVal)
	}

}

func difference(slice1 []string, slice2 []string) ([]string){
	diffStr := []string{}
	m :=map [string]int{}

	for _, s1Val := range slice1 {
		m[s1Val] = 1
	}
	for _, s2Val := range slice2 {
		m[s2Val] = m[s2Val] + 1
	}

	for mKey, mVal := range m {
		if mVal==1 {
			diffStr = append(diffStr, mKey)
		}
	}

	return diffStr
}

输出:
hello
world

英文:

I use the map to solve this problem

package main

import &quot;fmt&quot;

func main() {
	slice1 := []string{&quot;foo&quot;, &quot;bar&quot;,&quot;hello&quot;}
	slice2 := []string{&quot;foo&quot;, &quot;bar&quot;,&quot;world&quot;}

	diffStr := difference(slice1, slice2)

	for _, diffVal := range diffStr {
		fmt.Println(diffVal)
	}

}

func difference(slice1 []string, slice2 []string) ([]string){
	diffStr := []string{}
	m :=map [string]int{}

	for _, s1Val := range slice1 {
		m[s1Val] = 1
	}
	for _, s2Val := range slice2 {
		m[s2Val] = m[s2Val] + 1
	}

	for mKey, mVal := range m {
		if mVal==1 {
			diffStr = append(diffStr, mKey)
		}
	}

	return diffStr
}

output:
hello
world

答案4

得分: 3

func diff(a, b []string) []string {
    temp := map[string]int{}
    for _, s := range a {
        temp[s]++
    }
    for _, s := range b {
        temp[s]--
    }

    var result []string
    for s, v := range temp {
        if v != 0 {
            result = append(result, s)
        }
    }
    return result
}

如果你想处理重复的字符串可以使用 map 中的 `v` 来实现你可以选择 `a.Remove(b)``v>0` `b.Remove(a)``v<0`)。
英文:
func diff(a, b []string) []string {
    temp := map[string]int{}
    for _, s := range a {
        temp
展开收缩
++ } for _, s := range b { temp
展开收缩
-- } var result []string for s, v := range temp { if v != 0 { result = append(result, s) } } return result }

If you want to handle duplicated strings, the v in the map can do that. And you can pick a.Remove(b) ( v&gt;0 ) or b.Remove(a) (v&lt;0)

答案5

得分: 2

func unique(slice []string) []string {
    encountered := map[string]int{}
    diff := []string{}

    for _, v := range slice {
        encountered[v] = encountered[v]+1
    }

    for _, v := range slice {
        if encountered[v] == 1 {
            diff = append(diff, v)
        }
    }
    return diff
}

func main() {
    slice1 := []string{"hello", "michael", "dorner"}
    slice2 := []string{"hello", "michael"}
    slice3 := []string{}
    fmt.Println(unique(append(slice1, slice2...))) // [dorner]
    fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}
func unique(slice []string) []string {
    encountered := map[string]int{}
    diff := []string{}

    for _, v := range slice {
        encountered[v] = encountered[v]+1
    }

    for _, v := range slice {
        if encountered[v] == 1 {
            diff = append(diff, v)
        }
    }
    return diff
}

func main() {
    slice1 := []string{"hello", "michael", "dorner"}
    slice2 := []string{"hello", "michael"}
    slice3 := []string{}
    fmt.Println(unique(append(slice1, slice2...))) // [dorner]
    fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}

这是一个用于去除字符串切片中重复元素的函数。在unique函数中,首先创建了一个encountered的map,用于记录每个元素出现的次数。然后遍历切片,将每个元素的出现次数记录在encountered中。接着再次遍历切片,如果元素的出现次数为1,则将其添加到diff切片中。最后返回diff切片。

main函数中,定义了三个字符串切片slice1slice2slice3。通过调用unique函数,将slice1slice2合并后去除重复元素,并打印结果。然后将slice2slice3合并后去除重复元素,并打印结果。

英文:
func unique(slice []string) []string {
    encountered := map[string]int{}
    diff := []string{}

    for _, v := range slice {
        encountered[v] = encountered[v]+1
    }

    for _, v := range slice {
        if encountered[v] == 1 {
	    diff = append(diff, v)
        }
    }
    return diff
}

func main() {
	slice1 := []string{&quot;hello&quot;, &quot;michael&quot;, &quot;dorner&quot;}
	slice2 := []string{&quot;hello&quot;, &quot;michael&quot;}
	slice3 := []string{}
	fmt.Println(unique(append(slice1, slice2...))) // [dorner]
	fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}

答案6

得分: 1

根据ANisus提到的,不同的方法适用于不同大小的输入切片。这个解决方案将在O(n)的线性时间内工作,独立于输入大小,但假设“相等”包括索引位置。

因此,在OP的示例中:

slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}

条目foobar之间的相等性不仅取决于值,还取决于它们在切片中的索引。

在这些条件下,你可以这样做:

package main

import "fmt"

func difference(s1, s2 []string) string {
    var (
        lenMin  int
        longest []string
        out     string
    )
    // 确定最短长度和最长切片
    if len(s1) < len(s2) {
        lenMin = len(s1)
        longest = s2
    } else {
        lenMin = len(s2)
        longest = s1
    }
    // 比较公共索引
    for i := 0; i < lenMin; i++ {
        if s1[i] != s2[i] {
            out += fmt.Sprintf("=>\t%s\t%s\n", s1[i], s2[i])
        }
    }
    // 添加不在公共索引中的元素
    for _, v := range longest[lenMin:] {
        out += fmt.Sprintf("=>\t%s\n", v)
    }
    return out
}

func main() {
    slice1 := []string{"foo", "bar", "hello"}
    slice2 := []string{"foo", "bar"}
    fmt.Print(difference(slice1, slice2))
}

输出结果为:

=> hello

Playground

如果你将切片更改为:

func main() {
    slice1 := []string{"foo", "baz", "hello"}
    slice2 := []string{"foo", "bar"}    
    fmt.Print(difference(slice1, slice2))
}

将会产生:

=> baz bar
=> hello

英文:

As mentioned by ANisus, different approaches will suit different sizes of input slices. This solution will work in linear time O(n) independent of input size, but assumes that the "equality" includes index position.

Thus, in the OP's examples of:

slice1 := []string{&quot;foo&quot;, &quot;bar&quot;,&quot;hello&quot;}
slice2 := []string{&quot;foo&quot;, &quot;bar&quot;}

The entries foo and bar are equal not just due to value, but also due to their index in the slice.

Given these conditions, you can do something like:

package main

import &quot;fmt&quot;

func difference(s1, s2 []string) string {
	var (
		lenMin  int
		longest []string
		out     string
	)
	// Determine the shortest length and the longest slice
	if len(s1) &lt; len(s2) {
		lenMin = len(s1)
		longest = s2
	} else {
		lenMin = len(s2)
		longest = s1
	}
	// compare common indeces
	for i := 0; i &lt; lenMin; i++ {
		if s1[i] != s2[i] {
			out += fmt.Sprintf(&quot;=&gt;\t%s\t%s\n&quot;, s1[i], s2[i])
		}
	}
	// add indeces not in common
	for _, v := range longest[lenMin:] {
		out += fmt.Sprintf(&quot;=&gt;\t%s\n&quot;, v)
	}
	return out
}

func main() {
	slice1 := []string{&quot;foo&quot;, &quot;bar&quot;, &quot;hello&quot;}
	slice2 := []string{&quot;foo&quot;, &quot;bar&quot;}
	fmt.Print(difference(slice1, slice2))
}

Produces:

>> => hello

Playground

If you change the slices to be:

func main() {
	slice1 := []string{&quot;foo&quot;, &quot;baz&quot;, &quot;hello&quot;}
	slice2 := []string{&quot;foo&quot;, &quot;bar&quot;}	
	fmt.Print(difference(slice1, slice2))
}

It will produce:

>> => baz bar
=> hello

答案7

得分: 1

大多数其他解决方案在切片包含重复元素时无法返回正确的答案。

如果切片已经排序,这个解决方案的时间复杂度是O(n),空间复杂度是O(n);如果切片未排序,时间复杂度是O(n*log(n)),空间复杂度是O(n),但它具有实际正确的属性。😃

func diff(a, b []string) []string {
	a = sortIfNeeded(a)
	b = sortIfNeeded(b)
	var d []string
	i, j := 0, 0
	for i < len(a) && j < len(b) {
		c := strings.Compare(a[i], b[j])
		if c == 0 {
			i++
			j++
		} else if c < 0 {
			d = append(d, a[i])
			i++
		} else {
			d = append(d, b[j])
			j++
		}
	}
	d = append(d, a[i:len(a)]...)
	d = append(d, b[j:len(b)]...)
	return d
}

func sortIfNeeded(a []string) []string {
	if sort.StringsAreSorted(a) {
		return a
	}
	s := append(a[:0:0], a...)
	sort.Strings(s)
	return s
}

如果你确定切片已经排序,可以删除对sortIfNeeded的调用(在sortIfNeeded中进行防御性切片复制的原因是因为排序是原地进行的,所以我们会修改传递给diff的切片)。

请参阅https://play.golang.org/p/lH-5L0aL1qr,其中显示了面对重复条目时的正确性测试。

英文:

Most of the other solutions here will fail to return the correct answer in case the slices contain duplicated elements.

This solution is O(n) time and O(n) space if the slices are already sorted, and O(n*log(n)) time O(n) space if they are not, but has the nice property of actually being correct. 🤣

func diff(a, b []string) []string {
	a = sortIfNeeded(a)
	b = sortIfNeeded(b)
	var d []string
	i, j := 0, 0
	for i &lt; len(a) &amp;&amp; j &lt; len(b) {
		c := strings.Compare(a[i], b[j])
		if c == 0 {
			i++
			j++
		} else if c &lt; 0 {
			d = append(d, a[i])
			i++
		} else {
			d = append(d, b[j])
			j++
		}
	}
	d = append(d, a[i:len(a)]...)
	d = append(d, b[j:len(b)]...)
	return d
}

func sortIfNeeded(a []string) []string {
	if sort.StringsAreSorted(a) {
		return a
	}
	s := append(a[:0:0], a...)
	sort.Strings(s)
	return s
}

If you know for sure that the slices are already sorted, you can remove the calls to sortIfNeeded (the reason for the defensive slice copy in sortIfNeeded is because sorting is done in-place, so we would be modifying the slices that are passed to diff).

See https://play.golang.org/p/lH-5L0aL1qr for tests showing correctness in face of duplicated entries.

答案8

得分: 1

我有这个例子,但它只适用于第一个数组中的元素在第二个数组中“不存在”的情况。

使用泛型:

type HandleDiff[T comparable] func(item1 T, item2 T) bool

func HandleDiffDefault[T comparable](val1 T, val2 T) bool {
	return val1 == val2
}

func Diff[T comparable](items1 []T, items2 []T, callback HandleDiff[T]) []T {
	acc := []T{}
	for _, item1 := range items1 {
		find := false
		for _, item2 := range items2 {
			if callback(item1, item2) {
				find = true
				break
			}
		}
		if !find {
			acc = append(acc, item1)
		}
	}
	return acc
}

用法:

diff := Diff(items1, items2, HandleDiffDefault[string])
英文:

I have this example but it works only for the elements of the first array "not present" in the second array

with generics

type HandleDiff[T comparable] func(item1 T, item2 T) bool

func HandleDiffDefault[T comparable](val1 T, val2 T) bool {
	return val1 == val2
}

func Diff[T comparable](items1 []T, items2 []T, callback HandleDiff[T]) []T {
	acc := []T{}
	for _, item1 := range items1 {
		find := false
		for _, item2 := range items2 {
			if callback(item1, item2) {
				find = true
				break
			}
		}
		if !find {
			acc = append(acc, item1)
		}
	}
	return acc
}

usage

diff := Diff(items1, items2, HandleDiffDefault[string])

答案9

得分: 0

我会为@peterwilliams97的解决方案添加一个小改动,以便我们可以忽略输入的顺序。

func difference(a, b []string) []string {
	// 重新排序输入,
	// 这样我们可以检查较长的切片而不是较短的切片
	longer, shorter := a, b
	if len(b) > len(a) {
		longer, shorter = b, a
	}

	mb := make(map[string]struct{}, len(shorter))
	for _, x := range shorter {
		mb[x] = struct{}{}
	}
	var diff []string
	for _, x := range longer {
		if _, found := mb[x]; !found {
			diff = append(diff, x)
		}
	}
	return diff
}
英文:

I would add a small change to the solution by @peterwilliams97, so that we can ignore the order of the input.

func difference(a, b []string) []string {
	// reorder the input,
	// so that we can check the longer slice over the shorter one
	longer, shorter := a, b
	if len(b) &gt; len(a) {
		longer, shorter = b, a
	}

	mb := make(map[string]struct{}, len(shorter))
	for _, x := range shorter {
		mb[x] = struct{}{}
	}
	var diff []string
	for _, x := range longer {
		if _, found := mb[x]; !found {
			diff = append(diff, x)
		}
	}
	return diff
}

答案10

得分: 0

func difference(s1, s2 []string) []string {
	combinedSlice := append(s1, s2...)
	dm := make(map[string]int)
	for _, v := range combinedSlice {
		if _, ok := dm[v]; ok {
			// 后面删除该元素,因为它在两个切片中都存在。
			dm[v] += 1
			continue
		}
		// 新的条目,添加到映射中!
		dm[v] = 1
	}
	var retSlice []string
	for k, v := range dm {
		if v == 1 {
			retSlice = append(retSlice, k)
		}
	}
	return retSlice
}

// 如果我们需要从第一个切片中获取差异,请使用以下函数。

// 输出: [sweet]

func diff(s1, s2 []string) []string {
	mp1 := make(map[string]bool)
	for _, v := range s2 {
		mp1[v] = true
	}
	var dif []string
	for _, v1 := range s1 {
		if _, ok := mp1[v1]; !ok {
			dif = append(dif, v1)
		}
	}
	return dif
}
英文:

Input: s1 = ["this", "apple", "is", "sweet"], s2 = ["this" "apple" "is" "sour"]

Output: ["sweet","sour"]

func difference(s1, s2 []string) []string {
	combinedSlice := append(s1, s2...)
	dm := make(map[string]int)
	for _, v := range combinedSlice {
		if _, ok := dm[v]; ok {
			// remove element later as it exist in both slice.
			dm[v] += 1
			continue
		}
		// new entry, add in map!
		dm[v] = 1
	}
	var retSlice []string
	for k, v := range dm {
		if v == 1 {
			retSlice = append(retSlice, k)
		}
	}
	return retSlice
}

// If we needs difference from first slice use below funcion.

// Output: [sweet]

func diff(s1, s2 []string) []string {
	mp1 := make(map[string]bool)
	for _, v := range s2 {
		mp1[v] = true
	}
	var dif []string
	for _, v1 := range s1 {
		if _, ok := mp1[v1]; !ok {
			dif = append(dif, v1)
		}
	}
	return dif
}

答案11

得分: -1

下面的代码给出了字符串之间的绝对差异,不考虑顺序。空间复杂度为O(n),时间复杂度为O(n)。

// difference返回a中不在b中的元素
func difference(a, b string) string {
    longest, shortest := longestString(&a, &b)
    var builder strings.Builder
    var mem = make(map[rune]bool)
    for _, s := range longest {
        mem[s] = true
    }
    for _, s := range shortest {
        if _, ok := mem[s]; ok {
            mem[s] = false
        }
    }
    for k, v := range mem {
        if v == true {
            builder.WriteRune(k)
        }
    }
    return builder.String()
}

func longestString(a *string, b *string) ([]rune, []rune) {
    if len(*a) > len(*b) {
        return []rune(*a), []rune(*b)
    }
    return []rune(*b), []rune(*a)
}

以上是代码的翻译结果。

英文:

The code below gives the absolute difference between strings regardless of the order. Space complexity O(n) and Time complexity O(n).

// difference returns the elements in a that aren&#39;t in b
func difference(a, b string) string {
	longest, shortest := longestString(&amp;a, &amp;b)
	var builder strings.Builder
	var mem = make(map[rune]bool)
	for _, s := range longest {
		mem
展开收缩
= true } for _, s := range shortest { if _, ok := mem
展开收缩
; ok { mem
展开收缩
= false } } for k, v := range mem { if v == true { builder.WriteRune(k) } } return builder.String() } func longestString(a *string, b *string) ([]rune, []rune) { if len(*a) &gt; len(*b) { return []rune(*a), []rune(*b) } return []rune(*b), []rune(*a) }

huangapple
  • 本文由 发表于 2013年10月15日 13:59:28
  • 转载请务必保留本文链接:https://go.coder-hub.com/19374219.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定