2023年5月13日 11:49:33go评论103阅读模式

英文:

Transform and filter array of structs with parent struct field name

问题

以下是您要翻译的内容：

I am trying to do one more step further than this StackOverflow post (https://stackoverflow.com/questions/74299990/convert-struct-of-structs-to-array-of-structs-pulling-struct-field-name-inside) where I need to pull the struct field name, filter each struct array based on a condition of role values and transform each struct element into a new struct with the extracted struct field name.

Input:

 |-- a: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- struct_key: string (nullable = true)
 |    |    |-- two: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- name: string (nullable = true)
 |    |    |    |    |-- role: string (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- struct_key: string (nullable = true)
 |    |    |-- two: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- name: string (nullable = true)
 |    |    |    |    |-- role: string (nullable = true)

{
	"a": [{
			"two": [{
				"name": "person1",
				"role": "role1"
			},
			{
				"name": "person2",
				"role": "role1"
			},
			{
				"name": "person3",
				"role": "role2"
			}],
			"struct_key": "test1"
		},
		{
			"two": [{
				"name": "person4",
				"role": "role1"
			},
			{
				"name": "person5",
				"role": "role1"
			},
			{
				"name": "person6",
				"role": "role2"
			}],
			"struct_key": "test2"
		}
	]
}

input ={
    "a": [{
            "two": [{
                    "name": "person1",
                    "role": "role1"
                },
                {
                    "name": "person2",
                    "role": "role1"
                },
                {
                    "name": "person3",
                    "role": "role2"
                }
            ],
            "struct_key": "test1"
        },
        {
            "two": [{
                    "name": "person4",
                    "role": "role1"
                },
                {
                    "name": "person5",
                    "role": "role1"
                },
                {
                    "name": "person6",
                    "role": "role2"
                }
            ],
            "struct_key": "test2"
        }
    ]
}
df = spark.read.json(sc.parallelize([input]))
print(df.selectExpr('inline(a)').schema)

Expected output after filtering (for roles) and new struct transformation:

 |-- role_output: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- struct_key: string (nullable = true)
 |    |    |-- name: string (nullable = true)

{
	"role1_output": [
		{
			"struct_key": "test1",
			"name": "person1"
		}, 
		{
			"struct_key": "test1",
			"name": "person2"
		},
		{
			"struct_key": "test2",
			"name": "person4"
		},
		{
			"struct_key": "test2",
			"name": "person5"
		}
	]
}
{
	"role2_output": [
		{
			"struct_key": "test1",
			"name": "person3"
		}, 
		{
			"struct_key": "test2",
			"name": "person6"
		}
	]
}

我已经为您提供了所需的翻译部分，没有包括其他内容。如果您需要更多帮助，请随时告诉我。

英文:

Input:

 |-- a: array (nullable = true)
|    |-- element: struct (containsNull = true)
|    |    |-- struct_key: string (nullable = true)
|    |    |-- two: array (nullable = true)
|    |    |    |-- element: struct (containsNull = true)
|    |    |    |    |-- name: string (nullable = true)
|    |    |    |    |-- role: string (nullable = true)
|    |-- element: struct (containsNull = true)
|    |    |-- struct_key: string (nullable = true)
|    |    |-- two: array (nullable = true)
|    |    |    |-- element: struct (containsNull = true)
|    |    |    |    |-- name: string (nullable = true)
|    |    |    |    |-- role: string (nullable = true)

{
&quot;a&quot;: [{
&quot;two&quot;: [{
&quot;name&quot;: &quot;person1&quot;
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person2&quot;
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person3&quot;
&quot;role&quot;: &quot;role2&quot;
}],
&quot;struct_key&quot;: &quot;test1&quot;
},
{
&quot;two&quot;: [{
&quot;name&quot;: &quot;person4&quot;
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person5&quot;
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person6&quot;
&quot;role&quot;: &quot;role2&quot;
}],
&quot;struct_key&quot;: &quot;test2&quot;
}
]
}

input ={
&quot;a&quot;: [{
&quot;two&quot;: [{
&quot;name&quot;: &quot;person1&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person2&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person3&quot;,
&quot;role&quot;: &quot;role2&quot;
}
],
&quot;struct_key&quot;: &quot;test1&quot;
},
{
&quot;two&quot;: [{
&quot;name&quot;: &quot;person4&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person5&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person6&quot;,
&quot;role&quot;: &quot;role2&quot;
}
],
&quot;struct_key&quot;: &quot;test2&quot;
}
]
}
df = spark.read.json(sc.parallelize([input]))
print(df.selectExpr(&#39;inline(a)&#39;).schema)

Expected output after filtering (for roles) and new struct transformation:

 |-- role_output: array (nullable = true)
|    |-- element: struct (containsNull = true)
|    |    |-- struct_key: string (nullable = true)
|    |    |-- name: string (nullable = true)

{
role1_output: [
{
&quot;struct_key&quot;: &quot;test1&quot;,
&quot;name&quot;: &quot;person1&quot;
}, 
{
&quot;struct_key&quot;: &quot;test1&quot;,
&quot;name&quot;: &quot;person2&quot;
},
{
&quot;struct_key&quot;: &quot;test2&quot;,
&quot;name&quot;: &quot;person4&quot;
},
{
&quot;struct_key&quot;: &quot;test2&quot;,
&quot;name&quot;: &quot;person5&quot;
}
]
}
{
role2_output: [
{
&quot;struct_key&quot;: &quot;test1&quot;,
&quot;name&quot;: &quot;person3&quot;
}, 
{
&quot;struct_key&quot;: &quot;test2&quot;,
&quot;name&quot;: &quot;person6&quot;
}
]
}

I have tried the struct to map type conversion from that StackOverflow post answer but cannot figure out how to combine the extracted struct_key with another array of struct field values and create a new struct as I start transforming the array element, I lose the struct_key field value. Any advice?

答案1

得分: 0

见下文：

from pyspark.sql.functions import explode
from pyspark.sql.functions import array
from pyspark.sql.functions import struct
from pyspark.sql.functions import collect_list
输入 = {
    "a": [{
        "two": [{
            "name": "person1",
            "role": "role1"
        },
        {
            "name": "person2",
            "role": "role1"
        },
        {
            "name": "person3",
            "role": "role2"
        }
        ],
        "struct_key": "test1"
    },
    {
        "two": [{
            "name": "person4",
            "role": "role1"
        },
        {
            "name": "person5",
            "role": "role1"
        },
        {
            "name": "person6",
            "role": "role2"
        }
        ],
        "struct_key": "test2"
    }
    ]
}
df = spark.read.json(sc.parallelize([input])).selectExpr('inline(a)').select('struct_key', explode('two')).groupBy('col.role').agg(collect_list(struct('col.name','struct_key')))
df.show(truncate=False)
df.printSchema()

英文:

See below:

from pyspark.sql.functions import explode
from pyspark.sql.functions import array
from pyspark.sql.functions import struct
from pyspark.sql.functions import collect_list
input ={
&quot;a&quot;: [{
&quot;two&quot;: [{
&quot;name&quot;: &quot;person1&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person2&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person3&quot;,
&quot;role&quot;: &quot;role2&quot;
}
],
&quot;struct_key&quot;: &quot;test1&quot;
},
{
&quot;two&quot;: [{
&quot;name&quot;: &quot;person4&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person5&quot;,
&quot;role&quot;: &quot;role1&quot;
},
{
&quot;name&quot;: &quot;person6&quot;,
&quot;role&quot;: &quot;role2&quot;
}
],
&quot;struct_key&quot;: &quot;test2&quot;
}
]
}
df = spark.read.json(sc.parallelize([input])).selectExpr(&#39;inline(a)&#39;).select(&#39;struct_key&#39;, explode(&#39;two&#39;)).groupBy(&#39;col.role&#39;).agg(collect_list(struct(&#39;col.name&#39;,&#39;struct_key&#39;)))
df.show(truncate=False)
df.printSchema()

Gives you:

&gt;&gt;&gt; df.show(truncate=False)
+-----+------------------------------------------------------------------------+
|role |collect_list(struct(col.name, struct_key))                              |
+-----+------------------------------------------------------------------------+
|role2|[{person3, test1}, {person6, test2}]                                    |
|role1|[{person1, test1}, {person2, test1}, {person4, test2}, {person5, test2}]|
+-----+------------------------------------------------------------------------+
&gt;&gt;&gt; df.printSchema()
root
|-- role: string (nullable = true)
|-- collect_list(struct(col.name, struct_key)): array (nullable = false)
|    |-- element: struct (containsNull = false)
|    |    |-- name: string (nullable = true)
|    |    |-- struct_key: string (nullable = true)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

Transform and filter array of structs with parent struct field name.

问题

答案1

正确的初始化空切片的方法是什么？

在PHP中向多维数组添加键和值。

"cannot take the address of" and "cannot call pointer method on"

字节数组通过输入流

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。