英文:
How to open a JSON inside a column, which has a variable structure on SQL Server or similar
问题
我有一个SQL Server上的表,我想从中提取一个列的信息,但在该列中,我有730行不同结构的JSON。过去,我使用了OPENJSON,但我的数据有一个结构化的JSON,它的工作方式如下:
SELECT t._id as IDENT, oj.id, oj.CreatedAt, oj.from_nome, oj.from_id, oj.observacao
FROM my.table as t
CROSS APPLY OPENJSON(t.observacoes)
WITH
(
id NVARCHAR(36) '$.id',
CreatedAt DATETIME2 '$.created_at',
from_nome NVARCHAR(50) '$.from.nome',
from_id NVARCHAR(50) '$.from.id',
observacao NVARCHAR(MAX) '$.observacao.texto'
) AS oj
但现在,每行的列内没有固定的结构,每个单元格都有不同的JSON结构,我将展示一个示例(这是该列的一行):
[
{
"created_at":"09/03/2021 16:50:52"
},
{
"tramite-yoeivxtq-parecer":{
"tramite-yoeivxtq-parecer-texto":"text",
"tramite-yoeivxtq-parecer-anexos":[
{
"name":"name",
"width":0,
"height":0,
"status":[
{
"status":true
}
],
"uploadedNow":true,
"size":3033168
}
]
},
"created_at":"26/04/2021 10:26:23",
"created_at_iso":{
"$date":"2021-04-26T13:26:23.168Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"name"
}
},
{
"tramite-a4ovjmk0-parecer":{
"tramite-a4ovjmk0-parecer-texto":"text",
"tramite-a4ovjmk0-parecer-anexos":null
},
"created_at":"10/05/2021 16:10:26",
"created_at_iso":{
"$date":"2021-05-10T19:10:26.042Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"xxx"
}
},
{
"tramite-e11cyklf-parecer":{
"tramite-e11cyklf-parecer-texto":"text",
"tramite-e11cyklf-parecer-anexos":null
},
"created_at":"12/05/2021 07:09:00",
"created_at_iso":{
"$date":"2021-05-12T10:09:00.403Z"
},
"from":{
"userId":"auth0|5e5808ba05a0dc0c17160d4b",
"name":"name"
}
}
]
我想要提取与以前的OPENJSON用法相同的内容:row_id, created_at, from_name, from_id, text
但是我无法进入这些JSON,因为它们没有像"$.tramite-yoeivxtq-parecer.tramite-yoeivxtq-parecer-texto"这样的固定结构,而是每个键都不同。
另外,我正在使用SQL Server 2022(RTM-GDR)(KB5021522)- 16.0.1050.5(X64)
PS:如果在SQL上不可行,希望能提供在Python上执行的方法。
英文:
I have this table on SQL Server from which I want to extract a column information, but inside that column I have 730 rows of different structured JSON. In the past, I used OPENJSON but my data had a structured JSON, it worked like this:
SELECT t._id as IDENT, oj.id, oj.CreatedAt, oj.from_nome, oj.from_id, oj.observacao
FROM my.table as t
CROSS APPLY OPENJSON(t.observacoes)
WITH
(
id NVARCHAR(36) '$.id',
CreatedAt DATETIME2 '$.created_at',
from_nome NVARCHAR(50) '$.from.nome',
from_id NVARCHAR(50) '$.from.id',
observacao NVARCHAR(MAX) '$.observacao.texto'
) AS oj
But now, I don't have a fixed structure inside each row of said column, each cell has a different JSON structure, I will show one example (which is one row of said column):
[
{
"created_at":"09/03/2021 16:50:52"
},
{
"tramite-yoeivxtq-parecer":{
"tramite-yoeivxtq-parecer-texto":"text",
"tramite-yoeivxtq-parecer-anexos":[
{
"name":"name",
"width":0,
"height":0,
"status":[
{
"status":true
}
],
"uploadedNow":true,
"size":3033168
}
]
},
"created_at":"26/04/2021 10:26:23",
"created_at_iso":{
"$date":"2021-04-26T13:26:23.168Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"name"
}
},
{
"tramite-a4ovjmk0-parecer":{
"tramite-a4ovjmk0-parecer-texto":"text",
"tramite-a4ovjmk0-parecer-anexos":null
},
"created_at":"10/05/2021 16:10:26",
"created_at_iso":{
"$date":"2021-05-10T19:10:26.042Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"xxx"
}
},
{
"tramite-e11cyklf-parecer":{
"tramite-e11cyklf-parecer-texto":"text",
"tramite-e11cyklf-parecer-anexos":null
},
"created_at":"12/05/2021 07:09:00",
"created_at_iso":{
"$date":"2021-05-12T10:09:00.403Z"
},
"from":{
"userId":"auth0|5e5808ba05a0dc0c17160d4b",
"name":"name"
}
}
]
I want to extract same as I did on later OPENJSON usage: row_id, created_at, from_name, from_id, text
But I can't get inside the JSON's because they do not have a solid structure like ".$.tramite-yoeivxtq-parecer.tramite-yoeivxtq-parecer-texto" and then the next one has a different key.
Also using SQL Server 2022 (RTM-GDR) (KB5021522) - 16.0.1050.5 (X64)
PS.: if it is not viable on SQL, a way to do it on Python would be appreciated
答案1
得分: 2
以下是翻译好的代码部分:
如果您的JSON结构非常动态,您可以使用递归公共表达式将JSON转换为如下表格格式:
WITH cte_recursive
AS
(
SELECT t.id, j1.[key] as 路径, j1.[key], j1.[value], j1.[type]
FROM #temp t
OUTER APPLY OPENJSON(t.JS) j1
UNION ALL
SELECT c.id, CONCAT(c.路径,'/', j2.[key]) as 路径, j2.[key], j2.[value], j2.[type]
FROM cte_recursive c
CROSS APPLY OPENJSON(c.[value]) j2
WHERE c.type IN (4, 5)
)
SELECT *
FROM cte_recursive c2
WHERE c2.[type] NOT IN (0, 4, 5);
英文:
If the structure of your JSON is very dynamic, you could use a recursive common table expression to convert your JSON in a tabular format like this:
WITH cte_recursive
AS
(
SELECT t.id, j1.[key] as Path,j1.[key],j1.[value],j1.[type]
FROM #temp t
OUTER APPLY OPENJSON(t.JS) j1
UNION ALL
SELECT c.id, CONCAT(c.Path,'/',j2.[key]) as Path,j2.[key],j2.[value],j2.[type]
FROM cte_recursive c
CROSS APPLY OPENJSON(c.[value]) j2
WHERE c.type IN (4,5)
)
SELECT *
FROM cte_recursive c2
WHERE c2.[type] NOT IN (0,4,5);
答案2
得分: 2
如果您正在使用SQL Server 2017,您可以通过一些嵌套的OPENJSON操作来获取动态值:
select x.[key] AS id
, MAX(CASE WHEN v.[key] = 'created_at' THEN v.value END) /*OVER(PARTITION BY x.[key])*/ AS created_at
, MAX(CASE WHEN v.[key] = 'from' then JSON_VALUE(v.value, '$.name') END) /*OVER(PARTITION BY x.[key]) */ AS from_nome
, MAX(CASE WHEN v.[key] = 'from' then JSON_VALUE(v.value, '$.userId') END) /*OVER(PARTITION BY x.[key]) */ AS from_id
, MAX(CASE WHEN v.[key] like '%tramite%' then JSON_VALUE(v.value, '$."' + v.[key] + '-texto"') END) /*OVER(PARTITION BY x.[key])*/ AS someText
--, *
from openjson(N'
[
{
"created_at":"09/03/2021 16:50:52"
},
{
"tramite-yoeivxtq-parecer":{
"tramite-yoeivxtq-parecer-texto":"text",
"tramite-yoeivxtq-parecer-anexos":[
{
"name":"name",
"width":0,
"height":0,
"status":[
{
"status":true
}
],
"uploadedNow":true,
"size":3033168
}
]
},
"created_at":"26/04/2021 10:26:23",
"created_at_iso":{
"$date":"2021-04-26T13:26:23.168Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"name"
}
},
{
"tramite-a4ovjmk0-parecer":{
"tramite-a4ovjmk0-parecer-texto":"text",
"tramite-a4ovjmk0-parecer-anexos":null
},
"created_at":"10/05/2021 16:10:26",
"created_at_iso":{
"$date":"2021-05-10T19:10:26.042Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"xxx"
}
},
{
"tramite-e11cyklf-parecer":{
"tramite-e11cyklf-parecer-texto":"text",
"tramite-e11cyklf-parecer-anexos":null
},
"created_at":"12/05/2021 07:09:00",
"created_at_iso":{
"$date":"2021-05-12T10:09:00.403Z"
},
"from":{
"userId":"auth0|5e5808ba05a0dc0c17160d4b",
"name":"name"
}
}
]') x
CROSS APPLY OPENJSON(x.value) v
where CAST(x.[key] AS INT) > 0
group by x.[key]
基本上,您首先使用openjson获取内容,然后将嵌套键拆分为独立的行,然后匹配您要查找的键,同时保留原始row_id作为将值分组到最终结果的方式。
要获取动态键名,您可以根据键ID构建键(这在SQL Server 2016及更早版本中是不可能的)。
出于调试目的,您可以取消注释分区并注释掉分组,只选择*以查看这些操作的工作原理。
英文:
If you're using SQL Server 2017 you could get the dynamic values by doing some nested OPENJSON magic:
select x.[key] AS id
, MAX(CASE WHEN v.[key] = 'created_at' THEN v.value END) /*OVER(PARTITION BY x.[key])*/ AS created_at
, MAX(CASE WHEN v.[key] = 'from' then JSON_VALUE(v.value, '$.name') END) /*OVER(PARTITION BY x.[key]) */ AS from_nome
, MAX(CASE WHEN v.[key] = 'from' then JSON_VALUE(v.value, '$.userId') END) /*OVER(PARTITION BY x.[key]) */ AS from_id
, MAX(CASE WHEN v.[key] like '%tramite%' then JSON_VALUE(v.value, '$."' + v.[key] + '-texto"') END) /*OVER(PARTITION BY x.[key])*/ AS someText
--, *
from openjson(N'
[
{
"created_at":"09/03/2021 16:50:52"
},
{
"tramite-yoeivxtq-parecer":{
"tramite-yoeivxtq-parecer-texto":"text",
"tramite-yoeivxtq-parecer-anexos":[
{
"name":"name",
"width":0,
"height":0,
"status":[
{
"status":true
}
],
"uploadedNow":true,
"size":3033168
}
]
},
"created_at":"26/04/2021 10:26:23",
"created_at_iso":{
"$date":"2021-04-26T13:26:23.168Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"name"
}
},
{
"tramite-a4ovjmk0-parecer":{
"tramite-a4ovjmk0-parecer-texto":"text",
"tramite-a4ovjmk0-parecer-anexos":null
},
"created_at":"10/05/2021 16:10:26",
"created_at_iso":{
"$date":"2021-05-10T19:10:26.042Z"
},
"from":{
"userId":"auth0|5f9856efc6b72c007100caec",
"name":"xxx"
}
},
{
"tramite-e11cyklf-parecer":{
"tramite-e11cyklf-parecer-texto":"text",
"tramite-e11cyklf-parecer-anexos":null
},
"created_at":"12/05/2021 07:09:00",
"created_at_iso":{
"$date":"2021-05-12T10:09:00.403Z"
},
"from":{
"userId":"auth0|5e5808ba05a0dc0c17160d4b",
"name":"name"
}
}
]') x
CROSS APPLY OPENJSON(x.value) v
where CAST(x.[key] AS INT) > 0
group by x.[key]
Basically, you first openjson to get the contents and then shred down every key in the nested to own rows, and then match the keys you're looking for, while keeping the original row_id as a way to group the values together for the final result.
To get the dynamic key name, you build the key from the key id (this is not possible in SQL Server 2016 and earlier)
For debug purposes, you can uncomment the partition by and comment out group by and just select * to see how this stuff works.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论