英文:
Matching array elements from two tables and returning results in a new field with distinct values sharing at least one value using clickhouse
问题
你好,我有两个表,它们共享两个字段,'path'(单个字符串字段)和'arrKW'(字符串数组)。
示例表1:
create table test (path String, arrKW Array(String)) Engine=Memory as
select * from values (('folder/puntonet',['kw1','kw2']),
('folder/puntonet-2.0',['kw2','kw3']),
('folder/puntonet-4',['kw2','kw4']),
('folder/puntonet-5',['kw5','kw4']));
表1内容如下:
| path | arrKW |
+---------------------+---------------+
| folder/puntonet | ['kw1','kw2'] |
| folder/puntonet-2.0 | ['kw2','kw3'] |
| folder/puntonet-4 | ['kw2','kw4'] |
| folder/puntonet-5 | ['kw5','kw4'] |
示例表2:
create table test2 (path String, arrKW Array(String)) Engine=Memory as
select * from values (('folder/otherpuntonet',['kw1','kw2']),
('folder/otherpuntonet-2.0',['kw2','kw77']),
('folder/otherpuntonet-4',['kw2','kw77']),
('folder/puntonet-5',['kw5','kw4']));
表2内容如下:
| path | arrKW |
+--------------------------+-----------------+
| folder/otherpuntonet | ['kw1','kw2'] |
| folder/otherpuntonet-2.0 | ['kw2','kw77'] |
| folder/otherpuntonet-4 | ['kw2','kw77'] |
| folder/puntonet-5 | ['kw5','kw4'] |
期望的结果:
| path | arrKW | arrResult (来自表2) |
+----------------------+---------------+---------------------------------------------------------------------------------------------+
| folder/otherpuntonet | ['kw1','kw2'] | ['folder/otherpuntonet-2.0','folder/otherpuntonet-4'] |
| folder/puntonet-2.0 | ['kw2','kw3'] | ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4'] |
| folder/puntonet-4 | ['kw2','kw4'] | ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4','folder/puntonet-5'] |
| folder/puntonet-5 | ['kw5','kw4'] | ['folder/otherpuntonet-4'] |
英文:
Hello I've two tables that share two fields, 'path' (single String field) and 'arrKW' (Array(String))
Exaple Table1
create table test (path String, arrKW Array(String))Engine=Memory as
select * from values (('folder/puntonet',['kw1','kw2']),
('folder/puntonet-2.0',['kw2','kw3']),
('folder/puntonet-4',['kw2','kw4']),
('folder/puntonet-5',['kw5','kw4']));
| path | arrKW |
+---------------------+---------------+
| folder/puntonet | ['kw1','kw2'] |
| folder/puntonet-2.0 | ['kw2','kw3'] |
| folder/puntonet-4 | ['kw2','kw4'] |
| folder/puntonet-5 | ['kw5','kw4'] |
Exaple Table2
create table test2 (path String, arrKW Array(String))Engine=Memory as
select * from values (('folder/otherpuntonet',['kw1','kw2']),
('folder/otherpuntonet-2.0',['kw2','kw77']),
('folder/otherpuntonet-4',['kw2','kw77']),
('folder/puntonet-5',['kw5','kw4']))
| path | arrKW |
+--------------------------+----------------+
| folder/otherpuntonet | ['kw1','kw2'] |
| folder/otherpuntonet-2.0 | ['kw2','kw77'] |
| folder/otherpuntonet-4 | ['kw2','kw77'] |
| folder/puntonet-5 | ['kw5','kw4'] |
Result desired
| path | arrKW | arrResult (from table2) |
+----------------------+---------------+--------------------------------------------------------------------------------------------------+
| folder/otherpuntonet | ['kw1','kw2'] | ['folder/otherpuntonet-2.0','folder/otherpuntonet-4'] |
| folder/puntonet-2.0 | ['kw2','kw3'] | ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4'] |
| folder/puntonet-4 | ['kw2','kw4'] | ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4','folder/puntonet-5'] |
| folder/puntonet-5 | ['kw5','kw4'] | ['folder/otherpuntonet-4'] |
答案1
得分: 1
以下是翻译好的内容:
原始代码:
task is very unclear
create table test (path String, arrKW Array(String))Engine=Memory as
select * from values (('folder/puntonet',['kw1','kw2']),
('folder/puntonet-2.0',['kw2','kw3']),
('folder/puntonet-4',['kw2','kw4']),
('folder/puntonet-5',['kw5','kw4']));
create table test2 (path String, arrKW Array(String))Engine=Memory as
select * from values (('folder/otherpuntonet',['kw1','kw2']),
('folder/otherpuntonet-2.0',['kw2','kw77']),
('folder/otherpuntonet-4',['kw2','kw77']),
('folder/otherpuntonet-5',['kw5','kw4']))
SELECT
(arrayJoin(a) AS t).2 AS path,
t.1 AS KW,
t.3 as tab,
arraySort(groupUniqArrayArray(patha)) AS result
FROM
(
SELECT
groupArray(pathx) AS patha,
groupArray(t) AS a,
arrayJoin(arrKW) AS KW
FROM (select 'test' tab, null pathx, arrKW, (arrKW, path, tab) t from test union all
select 'test2' tab, path pathx, arrKW, (arrKW, path, tab) t from test2)
GROUP BY KW
)
GROUP BY t
having tab = 'test'
ORDER BY path ASC
改进后的代码:
SELECT
(arrayJoin(a) AS t).2 AS path,
t.1 AS KW,
arraySort(groupUniqArrayArray(patha)) AS result
FROM
(
SELECT
groupArray(pathx) AS patha,
groupArray(t) AS a,
arrayJoin(arrKW) AS KW
FROM (select null pathx, arrKW, (arrKW, path) t from test union all
select path pathx, arrKW, ([], null) t from test2)
GROUP BY KW
)
WHERE path is not null
GROUP BY t
ORDER BY path ASC
希望这对你有帮助。
英文:
task is very unclear
create table test (path String, arrKW Array(String))Engine=Memory as
select * from values (('folder/puntonet',['kw1','kw2']),
('folder/puntonet-2.0',['kw2','kw3']),
('folder/puntonet-4',['kw2','kw4']),
('folder/puntonet-5',['kw5','kw4']));
create table test2 (path String, arrKW Array(String))Engine=Memory as
select * from values (('folder/otherpuntonet',['kw1','kw2']),
('folder/otherpuntonet-2.0',['kw2','kw77']),
('folder/otherpuntonet-4',['kw2','kw77']),
('folder/otherpuntonet-5',['kw5','kw4']))
SELECT
(arrayJoin(a) AS t).2 AS path,
t.1 AS KW,
t.3 as tab,
arraySort(groupUniqArrayArray(patha)) AS result
FROM
(
SELECT
groupArray(pathx) AS patha,
groupArray(t) AS a,
arrayJoin(arrKW) AS KW
FROM (select 'test' tab, null pathx, arrKW, (arrKW, path, tab) t from test union all
select 'test2' tab, path pathx, arrKW, (arrKW, path, tab) t from test2)
GROUP BY KW
)
GROUP BY t
having tab = 'test'
ORDER BY path ASC
┌─path────────────────┬─KW────────────┬─tab──┬─result────────────────────────────────────────────────────────────────────────────────────────────────┐
│ folder/puntonet │ ['kw1','kw2'] │ test │ ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4'] │
│ folder/puntonet-2.0 │ ['kw2','kw3'] │ test │ ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4'] │
│ folder/puntonet-4 │ ['kw2','kw4'] │ test │ ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4','folder/otherpuntonet-5'] │
│ folder/puntonet-5 │ ['kw5','kw4'] │ test │ ['folder/otherpuntonet-5'] │
└─────────────────────┴───────────────┴──────┴───────────────────────────────────────────────────────────────────────────────────────────────────────┘
probably it can be improved
SELECT
(arrayJoin(a) AS t).2 AS path,
t.1 AS KW,
arraySort(groupUniqArrayArray(patha)) AS result
FROM
(
SELECT
groupArray(pathx) AS patha,
groupArray(t) AS a,
arrayJoin(arrKW) AS KW
FROM (select null pathx, arrKW, (arrKW, path) t from test union all
select path pathx, arrKW, ([], null) t from test2)
GROUP BY KW
)
WHERE path is not null
GROUP BY t
ORDER BY path ASC
┌─path────────────────┬─KW────────────┬─result────────────────────────────────────────────────────────────────────────────────────────────────┐
│ folder/puntonet │ ['kw1','kw2'] │ ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4'] │
│ folder/puntonet-2.0 │ ['kw2','kw3'] │ ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4'] │
│ folder/puntonet-4 │ ['kw2','kw4'] │ ['folder/otherpuntonet','folder/otherpuntonet-2.0','folder/otherpuntonet-4','folder/otherpuntonet-5'] │
│ folder/puntonet-5 │ ['kw5','kw4'] │ ['folder/otherpuntonet-5'] │
└─────────────────────┴───────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────┘
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论