英文:
Keeping a running list with a specific limit
问题
以下是要翻译的内容:
DECLARE _MAX_ACTIVE_ENROLLMENTS INT64 DEFAULT 2;
WITH `activity` AS (
SELECT "2022-01-01" AS `date_time`, "tim" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-02" AS `date_time`, "sarah" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-04" AS `date_time`, "tim" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-05" AS `date_time`, "ed" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-06" AS `date_time`, "ed" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-08" AS `date_time`, "ed" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-09" AS `date_time`, "lisa" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-10" AS `date_time`, "sarah" AS `username`, "canceled" AS `activity`
)
我会提供翻译后的内容,但不会提供额外的回答。以下是翻译好的内容:
DECLARE _MAX_ACTIVE_ENROLLMENTS INT64 DEFAULT 2;
WITH `activity` AS (
SELECT "2022-01-01" AS `date_time`, "tim" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-02" AS `date_time`, "sarah" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-04" AS `date_time`, "tim" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-05" AS `date_time`, "ed" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-06" AS `date_time`, "ed" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-08" AS `date_time`, "ed" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-09" AS `date_time`, "lisa" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-10" AS `date_time`, "sarah" AS `username`, "canceled" AS `activity`
)
英文:
I have the following sample dataset.
DECLARE _MAX_ACTIVE_ENROLLMENTS INT64 DEFAULT 2;
WITH `activity` AS (
SELECT "2022-01-01" AS `date_time`, "tim" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-02" AS `date_time`, "sarah" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-04" AS `date_time`, "tim" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-05" AS `date_time`, "ed" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-06" AS `date_time`, "ed" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-08" AS `date_time`, "ed" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-09" AS `date_time`, "lisa" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-10" AS `date_time`, "sarah" AS `username`, "canceled" AS `activity`
)
I would like to analyze this dataset based on a specific limit to the number of enrollments. So have an overview of which enrollments would have been invalid if the limit was lower than the number of people applying.
The thing that I'm struggling with in this challenge is that, on every row, you need to keep a running list of active_enrollments
that contains the usernames. You need to keep this list to know which "canceled" events you should process and which ones you can ignore. That's how I'm trying to solve the challenge, but maybe there is an entirely different way.
This is the outcome that I'm looking for.
-- _MAX_ACTIVE_ENROLLMENTS = 1
| date_time | username | activity | valid_enrollment |
|------------|----------|----------|------------------|
| 2022-01-01 | tim | enrolled | 1 |
| 2022-01-02 | sarah | enrolled | 0 |
| 2022-01-04 | tim | extended | 1 |
| 2022-01-05 | ed | enrolled | 0 |
| 2022-01-06 | ed | extended | 0 |
| 2022-01-07 | tim | canceled | 1 |
| 2022-01-07 | tim | canceled | 1 |
| 2022-01-08 | ed | canceled | 0 |
| 2022-01-09 | lisa | enrolled | 1 |
| 2022-01-10 | sarah | canceled | 0 |
-- _MAX_ACTIVE_ENROLLMENTS = 2
| date_time | username | activity | valid_enrollment |
|------------|----------|----------|------------------|
| 2022-01-01 | tim | enrolled | 1 |
| 2022-01-02 | sarah | enrolled | 1 |
| 2022-01-04 | tim | extended | 1 |
| 2022-01-05 | ed | enrolled | 0 |
| 2022-01-06 | ed | extended | 0 |
| 2022-01-07 | tim | canceled | 1 |
| 2022-01-07 | tim | canceled | 1 |
| 2022-01-08 | ed | canceled | 0 |
| 2022-01-09 | lisa | enrolled | 1 |
| 2022-01-10 | sarah | canceled | 1 |
Any help or pointers in the right direction are much appreciated!
答案1
得分: 1
I hope that below give you some direction to your problem.
DECLARE _MAX_ACTIVE_ENROLLMENTS INT64 DEFAULT 2;
WITH RECURSIVE `activity` AS (
-- put your sample data here
),
activities AS (
SELECT ROW_NUMBER() OVER (ORDER BY date_time) AS rn, * FROM activity
),
enrollments AS (
SELECT *,
IF (_MAX_ACTIVE_ENROLLMENTS > 0 AND activity = 'enrolled', [username], []) AS enrolled_users,
IF (_MAX_ACTIVE_ENROLLMENTS = 0 AND activity = 'enrolled', 0, 1) AS valid_enrollment
FROM activities WHERE rn = 1
UNION ALL
SELECT e.rn + 1, a.date_time, a.username, a.activity,
CASE
WHEN a.activity = 'enrolled' AND ARRAY_LENGTH(enrolled_users) < _MAX_ACTIVE_ENROLLMENTS THEN e.enrolled_users || [a.username]
WHEN a.activity = 'canceled' THEN ARRAY(SELECT u FROM UNNEST(e.enrolled_users) u WHERE u <> a.username)
ELSE e.enrolled_users
END AS enrolled_users,
CASE
WHEN a.activity = 'enrolled' AND ARRAY_LENGTH(enrolled_users) = _MAX_ACTIVE_ENROLLMENTS THEN 0
WHEN a.activity IN ('extended', 'canceled') AND a.username NOT IN UNNEST(enrolled_users) THEN 0
ELSE 1
END AS valid_enrollment
FROM enrollments e JOIN activities a ON e.rn + 1 = a.rn
)
SELECT date_time, username, activity, valid_enrollment FROM enrollments
ORDER BY rn;
Query results
英文:
I hope that below give you some direction to your problem.
DECLARE _MAX_ACTIVE_ENROLLMENTS INT64 DEFAULT 2;
WITH RECURSIVE `activity` AS (
-- put your sample data here
),
activities AS (
SELECT ROW_NUMBER() OVER (ORDER BY date_time) AS rn, * FROM activity
),
enrollments AS (
SELECT *,
IF (_MAX_ACTIVE_ENROLLMENTS > 0 AND activity = 'enrolled', [username], []) AS enrolled_users,
IF (_MAX_ACTIVE_ENROLLMENTS = 0 AND activity = 'enrolled', 0, 1) AS valid_enrollment
FROM activities WHERE rn = 1
UNION ALL
SELECT e.rn + 1, a.date_time, a.username, a.activity,
CASE
WHEN a.activity = 'enrolled' AND ARRAY_LENGTH(enrolled_users) < _MAX_ACTIVE_ENROLLMENTS THEN e.enrolled_users || [a.username]
WHEN a.activity = 'canceled' THEN ARRAY(SELECT u FROM UNNEST(e.enrolled_users) u WHERE u <> a.username)
ELSE e.enrolled_users
END AS enrolled_users,
CASE
WHEN a.activity = 'enrolled' AND ARRAY_LENGTH(enrolled_users) = _MAX_ACTIVE_ENROLLMENTS THEN 0
WHEN a.activity IN ('extended', 'canceled') AND a.username NOT IN UNNEST(enrolled_users) THEN 0
ELSE 1
END AS valid_enrollment
FROM enrollments e JOIN activities a ON e.rn + 1 = a.rn
)
SELECT date_time, username, activity, valid_enrollment FROM enrollments
ORDER BY rn;
Query results
Outdated answer
It will return enrolled usernames limited to the number of _MAX_ACTIVE_ENROLLMENTS
at a given datetime
, (though valid_enrollment
column in your outcome is not clear to me).
SELECT * EXCEPT (enrollments),
ARRAY (
SELECT username FROM (
SELECT DISTINCT username,
LAST_VALUE(IF(activity = 'enrolled', date_time, NULL) IGNORE NULLS) OVER w1 AS last_enrolled
FROM UNNEST(enrollments)
QUALIFY LAST_VALUE(activity) OVER w1 <> 'canceled'
WINDOW w1 AS (PARTITION BY username ORDER BY date_time
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
) ORDER BY last_enrolled LIMIT 2 --<-- _MAX_ACTIVE_ENROLLMENTS
) AS valid_enrollment
FROM (
SELECT *, ARRAY_AGG(STRUCT(username, t.activity, date_time)) OVER w0 AS enrollments
FROM `activity` t
WINDOW w0 AS (ORDER BY date_time)
);
答案2
得分: 0
这是要翻译的代码的翻译部分:
有需要生成一个包含所有报名人员的数组列表。在接下来的步骤中,将从该数组中删除“已取消”的人员。
表格“activity”如下:
- 选择“2021-01-05”作为“date_time”,“ed”作为“username”,“enrolled”作为“activity” UNION ALL
- 选择“2022-01-01”作为“date_time”,“tim”作为“username”,“enrolled”作为“activity” UNION ALL
- 选择“2022-01-02”作为“date_time”,“sarah”作为“username”,“enrolled”作为“activity” UNION ALL
- 选择“2022-01-04”作为“date_time”,“tim”作为“username”,“extended”作为“activity” UNION ALL
- 选择“2022-01-05”作为“date_time”,“ed”作为“username”,“enrolled”作为“activity” UNION ALL
- 选择“2022-01-06”作为“date_time”,“ed”作为“username”,“extended”作为“activity” UNION ALL
- 选择“2022-01-07”作为“date_time”,“tim”作为“username”,“canceled”作为“activity” UNION ALL
- 选择“2022-01-08”作为“date_time”,“ed”作为“username”,“canceled”作为“activity” UNION ALL
- 选择“2022-01-09”作为“date_time”,“lisa”作为“username”,“enrolled”作为“activity” UNION ALL
- 选择“2022-01-10”作为“date_time”,“sarah”作为“username”,“canceled”作为“activity”
表格“help”添加了两个新列“enroll”和“cancel”,如果为真,则将其设置为1。
表格“help1”生成了截止到该日期的所有报名人员的数组“arr”。如果某一行不是报名行为,我们添加字符串“NULL”,并在后续步骤中将其删除。
表格“help2”删除了已取消的人员。这是通过子查询完成的。
表格“help3”删除了“NULL”虚拟字符串以及多次申请的人员。还将数组设置为最大长度“_MAX_ACTIVE_ENROLLMENTS”。
最后,我们检查报名名单的更改“delta_enroll”。将此值与“delta_enroll_expected”进行比较,以检查报名/取消是否成功。
在help3表格中,按“date_time”排序,并计算“arr3”的数组长度(arr3_length),计算“arr3”的数组长度减去(如果有的话)前一行的数组长度(lag(arr3) over (order by date_time)),以及计算“enroll”减去“cancel”的差值(delta_enroll_expected)。
英文:
There is the need to generate an array as a list with all enroll people. In the next steps the "canceled" people are removed from that array.
DECLARE _MAX_ACTIVE_ENROLLMENTS INT64 DEFAULT 2;
WITH activity AS (
# SELECT "2021-01-05" AS `date_time`, "ed" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-01" AS `date_time`, "tim" AS `username`, "enrolled" AS activity UNION ALL
SELECT "2022-01-02" AS `date_time`, "sarah" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-04" AS `date_time`, "tim" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-05" AS `date_time`, "ed" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-06" AS `date_time`, "ed" AS `username`, "extended" AS `activity` UNION ALL
SELECT "2022-01-07" AS `date_time`, "tim" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-08" AS `date_time`, "ed" AS `username`, "canceled" AS `activity` UNION ALL
SELECT "2022-01-09" AS `date_time`, "lisa" AS `username`, "enrolled" AS `activity` UNION ALL
SELECT "2022-01-10" AS `date_time`, "sarah" AS `username`, "canceled" AS `activity`
),
help as (select *,
if(activity.activity="enrolled",1,0) as enroll,
if(activity.activity="canceled",1,0) as cancel,
from activity ),
help1 as (select *,
array_agg(if(enroll=1,username,"NULL")) over win as arr1
from help
window win as (order by date_time range between unbounded preceding and current row)
),
help2 as (select *,
(Select array_agg(x order by offset) from (select x,offset from unnest(arr1) x with offset where cancel!=1 or x!=username)) as arr2
from help1) ,
help3 as (select *,
(Select array_agg(x order by offset) from
(Select x,offset from
(select x,min(offset) offset from unnest(arr2) x with offset where x!="NULL" group by x)
qualify row_number() over (order by offset)<= _MAX_ACTIVE_ENROLLMENTS)
) as arr3
from help2)
Select *,
array_length(arr3) as arr3_length,
array_length(arr3)-ifnull((array_length(lag(arr3) over (order by date_time))),0) as delta_enroll,
enroll-cancel as delta_enroll_expected
from
help3
order by date_time
The table help
adds two new columns enroll
and cancel
, each set to one, if true.
The table help1
generates an array arr
of all enrolled people to that date. If in a row not an enroll happens, we add the string "NULL"
and delete these in the further steps.
The table help2
removes the people who canceled. This is done by a sub-select-query.
The table3
removes the "NULL"
dummy string and if there are people applying several times. Also the array is set to the maximum length _MAX_ACTIVE_ENROLLMENTS
.
Finally we check for the change of the enrollment list delta_enroll
. If comparing this value with the delta_enroll_expected
the enrollment/canceling can be check for successfulness.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论