英文:
How to find the difference between two slices of strings
问题
这是我期望的结果
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "bar"}
difference(slice1, slice2)
=> ["hello"]
我正在寻找这两个字符串切片之间的差异!
英文:
Here is my desired outcome
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}
difference(slice1, slice2)
=> ["hello"]
I am looking for the difference between the two string slices!
答案1
得分: 81
假设Go的映射(maps)的时间复杂度为~O(1),这里是一个在未排序切片上工作的~O(n)差异函数。
// difference返回`a`中不在`b`中的元素。
func difference(a, b []string) []string {
mb := make(map[string]struct{}, len(b))
for _, x := range b {
mb[x] = struct{}{}
}
var diff []string
for _, x := range a {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}
这个函数通过创建一个映射(map)mb
来存储切片b
中的元素,并遍历切片a
,将不在mb
中的元素添加到结果切片diff
中,最后返回diff
。这样就可以找到a
中不在b
中的元素。
英文:
Assuming Go maps are ~O(1), here is an ~O(n) difference function that works on unsorted slices.
// difference returns the elements in `a` that aren't in `b`.
func difference(a, b []string) []string {
mb := make(map[string]struct{}, len(b))
for _, x := range b {
mb[x] = struct{}{}
}
var diff []string
for _, x := range a {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}
答案2
得分: 25
根据切片的大小,可能有不同的最佳解决方案。
我的答案假设顺序不重要。
使用简单循环,仅适用于较小的切片:
package main
import "fmt"
func difference(slice1 []string, slice2 []string) []string {
var diff []string
// 循环两次,第一次查找不在slice2中的slice1字符串,
// 第二次循环查找不在slice1中的slice2字符串
for i := 0; i < 2; i++ {
for _, s1 := range slice1 {
found := false
for _, s2 := range slice2 {
if s1 == s2 {
found = true
break
}
}
// 字符串未找到。将其添加到返回的切片中
if !found {
diff = append(diff, s1)
}
}
// 仅在第一次循环时交换切片
if i == 0 {
slice1, slice2 = slice2, slice1
}
}
return diff
}
func main() {
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "world", "bar", "foo"}
fmt.Printf("%+v\n", difference(slice1, slice2))
}
输出结果:
[hello world]
Playground: http://play.golang.org/p/KHTmJcR4rg
英文:
Depending on the size of the slices, different solutions might be best.
My answer assumes order doesn't matter.
Using simple loops, only to be used with smaller slices:
package main
import "fmt"
func difference(slice1 []string, slice2 []string) []string {
var diff []string
// Loop two times, first to find slice1 strings not in slice2,
// second loop to find slice2 strings not in slice1
for i := 0; i < 2; i++ {
for _, s1 := range slice1 {
found := false
for _, s2 := range slice2 {
if s1 == s2 {
found = true
break
}
}
// String not found. We add it to return slice
if !found {
diff = append(diff, s1)
}
}
// Swap the slices, only if it was the first loop
if i == 0 {
slice1, slice2 = slice2, slice1
}
}
return diff
}
func main() {
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "world", "bar", "foo"}
fmt.Printf("%+v\n", difference(slice1, slice2))
}
Output:
[hello world]
Playground: http://play.golang.org/p/KHTmJcR4rg
答案3
得分: 15
我使用地图来解决这个问题
package main
import "fmt"
func main() {
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar","world"}
diffStr := difference(slice1, slice2)
for _, diffVal := range diffStr {
fmt.Println(diffVal)
}
}
func difference(slice1 []string, slice2 []string) ([]string){
diffStr := []string{}
m :=map [string]int{}
for _, s1Val := range slice1 {
m[s1Val] = 1
}
for _, s2Val := range slice2 {
m[s2Val] = m[s2Val] + 1
}
for mKey, mVal := range m {
if mVal==1 {
diffStr = append(diffStr, mKey)
}
}
return diffStr
}
输出:
hello
world
英文:
I use the map to solve this problem
package main
import "fmt"
func main() {
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar","world"}
diffStr := difference(slice1, slice2)
for _, diffVal := range diffStr {
fmt.Println(diffVal)
}
}
func difference(slice1 []string, slice2 []string) ([]string){
diffStr := []string{}
m :=map [string]int{}
for _, s1Val := range slice1 {
m[s1Val] = 1
}
for _, s2Val := range slice2 {
m[s2Val] = m[s2Val] + 1
}
for mKey, mVal := range m {
if mVal==1 {
diffStr = append(diffStr, mKey)
}
}
return diffStr
}
output:
hello
world
答案4
得分: 3
func diff(a, b []string) []string {
temp := map[string]int{}
for _, s := range a {
temp[s]++
}
for _, s := range b {
temp[s]--
}
var result []string
for s, v := range temp {
if v != 0 {
result = append(result, s)
}
}
return result
}
如果你想处理重复的字符串,可以使用 map 中的 `v` 来实现。你可以选择 `a.Remove(b)`(`v>0`)或 `b.Remove(a)`(`v<0`)。
英文:
func diff(a, b []string) []string {
temp := map[string]int{}
for _, s := range a {
temp展开收缩++
}
for _, s := range b {
temp展开收缩--
}
var result []string
for s, v := range temp {
if v != 0 {
result = append(result, s)
}
}
return result
}
If you want to handle duplicated strings, the v
in the map can do that. And you can pick a.Remove(b)
( v>0
) or b.Remove(a)
(v<0
)
答案5
得分: 2
func unique(slice []string) []string {
encountered := map[string]int{}
diff := []string{}
for _, v := range slice {
encountered[v] = encountered[v]+1
}
for _, v := range slice {
if encountered[v] == 1 {
diff = append(diff, v)
}
}
return diff
}
func main() {
slice1 := []string{"hello", "michael", "dorner"}
slice2 := []string{"hello", "michael"}
slice3 := []string{}
fmt.Println(unique(append(slice1, slice2...))) // [dorner]
fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}
func unique(slice []string) []string {
encountered := map[string]int{}
diff := []string{}
for _, v := range slice {
encountered[v] = encountered[v]+1
}
for _, v := range slice {
if encountered[v] == 1 {
diff = append(diff, v)
}
}
return diff
}
func main() {
slice1 := []string{"hello", "michael", "dorner"}
slice2 := []string{"hello", "michael"}
slice3 := []string{}
fmt.Println(unique(append(slice1, slice2...))) // [dorner]
fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}
这是一个用于去除字符串切片中重复元素的函数。在unique
函数中,首先创建了一个encountered
的map,用于记录每个元素出现的次数。然后遍历切片,将每个元素的出现次数记录在encountered
中。接着再次遍历切片,如果元素的出现次数为1,则将其添加到diff
切片中。最后返回diff
切片。
在main
函数中,定义了三个字符串切片slice1
、slice2
和slice3
。通过调用unique
函数,将slice1
和slice2
合并后去除重复元素,并打印结果。然后将slice2
和slice3
合并后去除重复元素,并打印结果。
英文:
func unique(slice []string) []string {
encountered := map[string]int{}
diff := []string{}
for _, v := range slice {
encountered[v] = encountered[v]+1
}
for _, v := range slice {
if encountered[v] == 1 {
diff = append(diff, v)
}
}
return diff
}
func main() {
slice1 := []string{"hello", "michael", "dorner"}
slice2 := []string{"hello", "michael"}
slice3 := []string{}
fmt.Println(unique(append(slice1, slice2...))) // [dorner]
fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}
答案6
得分: 1
根据ANisus提到的,不同的方法适用于不同大小的输入切片。这个解决方案将在O(n)
的线性时间内工作,独立于输入大小,但假设“相等”包括索引位置。
因此,在OP的示例中:
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}
条目foo
和bar
之间的相等性不仅取决于值,还取决于它们在切片中的索引。
在这些条件下,你可以这样做:
package main
import "fmt"
func difference(s1, s2 []string) string {
var (
lenMin int
longest []string
out string
)
// 确定最短长度和最长切片
if len(s1) < len(s2) {
lenMin = len(s1)
longest = s2
} else {
lenMin = len(s2)
longest = s1
}
// 比较公共索引
for i := 0; i < lenMin; i++ {
if s1[i] != s2[i] {
out += fmt.Sprintf("=>\t%s\t%s\n", s1[i], s2[i])
}
}
// 添加不在公共索引中的元素
for _, v := range longest[lenMin:] {
out += fmt.Sprintf("=>\t%s\n", v)
}
return out
}
func main() {
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "bar"}
fmt.Print(difference(slice1, slice2))
}
输出结果为:
=> hello
如果你将切片更改为:
func main() {
slice1 := []string{"foo", "baz", "hello"}
slice2 := []string{"foo", "bar"}
fmt.Print(difference(slice1, slice2))
}
将会产生:
=> baz bar
=> hello
英文:
As mentioned by ANisus, different approaches will suit different sizes of input slices. This solution will work in linear time O(n)
independent of input size, but assumes that the "equality" includes index position.
Thus, in the OP's examples of:
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}
The entries foo
and bar
are equal not just due to value, but also due to their index in the slice.
Given these conditions, you can do something like:
package main
import "fmt"
func difference(s1, s2 []string) string {
var (
lenMin int
longest []string
out string
)
// Determine the shortest length and the longest slice
if len(s1) < len(s2) {
lenMin = len(s1)
longest = s2
} else {
lenMin = len(s2)
longest = s1
}
// compare common indeces
for i := 0; i < lenMin; i++ {
if s1[i] != s2[i] {
out += fmt.Sprintf("=>\t%s\t%s\n", s1[i], s2[i])
}
}
// add indeces not in common
for _, v := range longest[lenMin:] {
out += fmt.Sprintf("=>\t%s\n", v)
}
return out
}
func main() {
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "bar"}
fmt.Print(difference(slice1, slice2))
}
Produces:
>> => hello
If you change the slices to be:
func main() {
slice1 := []string{"foo", "baz", "hello"}
slice2 := []string{"foo", "bar"}
fmt.Print(difference(slice1, slice2))
}
It will produce:
>> => baz bar
=> hello
答案7
得分: 1
大多数其他解决方案在切片包含重复元素时无法返回正确的答案。
如果切片已经排序,这个解决方案的时间复杂度是O(n),空间复杂度是O(n);如果切片未排序,时间复杂度是O(n*log(n)),空间复杂度是O(n),但它具有实际正确的属性。😃
func diff(a, b []string) []string {
a = sortIfNeeded(a)
b = sortIfNeeded(b)
var d []string
i, j := 0, 0
for i < len(a) && j < len(b) {
c := strings.Compare(a[i], b[j])
if c == 0 {
i++
j++
} else if c < 0 {
d = append(d, a[i])
i++
} else {
d = append(d, b[j])
j++
}
}
d = append(d, a[i:len(a)]...)
d = append(d, b[j:len(b)]...)
return d
}
func sortIfNeeded(a []string) []string {
if sort.StringsAreSorted(a) {
return a
}
s := append(a[:0:0], a...)
sort.Strings(s)
return s
}
如果你确定切片已经排序,可以删除对sortIfNeeded
的调用(在sortIfNeeded
中进行防御性切片复制的原因是因为排序是原地进行的,所以我们会修改传递给diff
的切片)。
请参阅https://play.golang.org/p/lH-5L0aL1qr,其中显示了面对重复条目时的正确性测试。
英文:
Most of the other solutions here will fail to return the correct answer in case the slices contain duplicated elements.
This solution is O(n) time and O(n) space if the slices are already sorted, and O(n*log(n)) time O(n) space if they are not, but has the nice property of actually being correct. 🤣
func diff(a, b []string) []string {
a = sortIfNeeded(a)
b = sortIfNeeded(b)
var d []string
i, j := 0, 0
for i < len(a) && j < len(b) {
c := strings.Compare(a[i], b[j])
if c == 0 {
i++
j++
} else if c < 0 {
d = append(d, a[i])
i++
} else {
d = append(d, b[j])
j++
}
}
d = append(d, a[i:len(a)]...)
d = append(d, b[j:len(b)]...)
return d
}
func sortIfNeeded(a []string) []string {
if sort.StringsAreSorted(a) {
return a
}
s := append(a[:0:0], a...)
sort.Strings(s)
return s
}
If you know for sure that the slices are already sorted, you can remove the calls to sortIfNeeded
(the reason for the defensive slice copy in sortIfNeeded
is because sorting is done in-place, so we would be modifying the slices that are passed to diff
).
See https://play.golang.org/p/lH-5L0aL1qr for tests showing correctness in face of duplicated entries.
答案8
得分: 1
我有这个例子,但它只适用于第一个数组中的元素在第二个数组中“不存在”的情况。
使用泛型:
type HandleDiff[T comparable] func(item1 T, item2 T) bool
func HandleDiffDefault[T comparable](val1 T, val2 T) bool {
return val1 == val2
}
func Diff[T comparable](items1 []T, items2 []T, callback HandleDiff[T]) []T {
acc := []T{}
for _, item1 := range items1 {
find := false
for _, item2 := range items2 {
if callback(item1, item2) {
find = true
break
}
}
if !find {
acc = append(acc, item1)
}
}
return acc
}
用法:
diff := Diff(items1, items2, HandleDiffDefault[string])
英文:
I have this example but it works only for the elements of the first array "not present" in the second array
with generics
type HandleDiff[T comparable] func(item1 T, item2 T) bool
func HandleDiffDefault[T comparable](val1 T, val2 T) bool {
return val1 == val2
}
func Diff[T comparable](items1 []T, items2 []T, callback HandleDiff[T]) []T {
acc := []T{}
for _, item1 := range items1 {
find := false
for _, item2 := range items2 {
if callback(item1, item2) {
find = true
break
}
}
if !find {
acc = append(acc, item1)
}
}
return acc
}
usage
diff := Diff(items1, items2, HandleDiffDefault[string])
答案9
得分: 0
我会为@peterwilliams97的解决方案添加一个小改动,以便我们可以忽略输入的顺序。
func difference(a, b []string) []string {
// 重新排序输入,
// 这样我们可以检查较长的切片而不是较短的切片
longer, shorter := a, b
if len(b) > len(a) {
longer, shorter = b, a
}
mb := make(map[string]struct{}, len(shorter))
for _, x := range shorter {
mb[x] = struct{}{}
}
var diff []string
for _, x := range longer {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}
英文:
I would add a small change to the solution by @peterwilliams97, so that we can ignore the order of the input.
func difference(a, b []string) []string {
// reorder the input,
// so that we can check the longer slice over the shorter one
longer, shorter := a, b
if len(b) > len(a) {
longer, shorter = b, a
}
mb := make(map[string]struct{}, len(shorter))
for _, x := range shorter {
mb[x] = struct{}{}
}
var diff []string
for _, x := range longer {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}
答案10
得分: 0
func difference(s1, s2 []string) []string {
combinedSlice := append(s1, s2...)
dm := make(map[string]int)
for _, v := range combinedSlice {
if _, ok := dm[v]; ok {
// 后面删除该元素,因为它在两个切片中都存在。
dm[v] += 1
continue
}
// 新的条目,添加到映射中!
dm[v] = 1
}
var retSlice []string
for k, v := range dm {
if v == 1 {
retSlice = append(retSlice, k)
}
}
return retSlice
}
// 如果我们需要从第一个切片中获取差异,请使用以下函数。
// 输出: [sweet]
func diff(s1, s2 []string) []string {
mp1 := make(map[string]bool)
for _, v := range s2 {
mp1[v] = true
}
var dif []string
for _, v1 := range s1 {
if _, ok := mp1[v1]; !ok {
dif = append(dif, v1)
}
}
return dif
}
英文:
Input: s1 = ["this", "apple", "is", "sweet"], s2 = ["this" "apple" "is" "sour"]
Output: ["sweet","sour"]
func difference(s1, s2 []string) []string {
combinedSlice := append(s1, s2...)
dm := make(map[string]int)
for _, v := range combinedSlice {
if _, ok := dm[v]; ok {
// remove element later as it exist in both slice.
dm[v] += 1
continue
}
// new entry, add in map!
dm[v] = 1
}
var retSlice []string
for k, v := range dm {
if v == 1 {
retSlice = append(retSlice, k)
}
}
return retSlice
}
// If we needs difference from first slice use below funcion.
// Output: [sweet]
func diff(s1, s2 []string) []string {
mp1 := make(map[string]bool)
for _, v := range s2 {
mp1[v] = true
}
var dif []string
for _, v1 := range s1 {
if _, ok := mp1[v1]; !ok {
dif = append(dif, v1)
}
}
return dif
}
答案11
得分: -1
下面的代码给出了字符串之间的绝对差异,不考虑顺序。空间复杂度为O(n),时间复杂度为O(n)。
// difference返回a中不在b中的元素
func difference(a, b string) string {
longest, shortest := longestString(&a, &b)
var builder strings.Builder
var mem = make(map[rune]bool)
for _, s := range longest {
mem[s] = true
}
for _, s := range shortest {
if _, ok := mem[s]; ok {
mem[s] = false
}
}
for k, v := range mem {
if v == true {
builder.WriteRune(k)
}
}
return builder.String()
}
func longestString(a *string, b *string) ([]rune, []rune) {
if len(*a) > len(*b) {
return []rune(*a), []rune(*b)
}
return []rune(*b), []rune(*a)
}
以上是代码的翻译结果。
英文:
The code below gives the absolute difference between strings regardless of the order. Space complexity O(n) and Time complexity O(n).
// difference returns the elements in a that aren't in b
func difference(a, b string) string {
longest, shortest := longestString(&a, &b)
var builder strings.Builder
var mem = make(map[rune]bool)
for _, s := range longest {
mem展开收缩 = true
}
for _, s := range shortest {
if _, ok := mem展开收缩; ok {
mem展开收缩 = false
}
}
for k, v := range mem {
if v == true {
builder.WriteRune(k)
}
}
return builder.String()
}
func longestString(a *string, b *string) ([]rune, []rune) {
if len(*a) > len(*b) {
return []rune(*a), []rune(*b)
}
return []rune(*b), []rune(*a)
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论