
huangapple go评论128阅读模式

MongoDB aggregation to group by multiple fields



  1. 我在我的集合中有以下数据
  2. {
  3.   name": test”,
  4.   "data": {
  5.     statusOne”: enabled”,
  6.     statusTwo”: active
  7.   }
  8. }
  9. {
  10.   name": test”,
  11.   "data": {
  12.     statusOne”: disabled”,
  13.     statusTwo”: active
  14.   }
  15. }
  16. {
  17.   name": another-test”,
  18.   "data": {
  19.     statusOne”: disabled”,
  20.     statusTwo”: active
  21.   }
  22. }


  1. output”: [
  2. {
  3. name”: test”,
  4. "data": [
  5. {
  6. status”: active”,
  7. count”: 2
  8. },
  9. {
  10. status”: disabled”,
  11. count”: 1
  12. },
  13. {
  14. status”: enabled”,
  15. count”: 1
  16. }
  17. ]
  18. },
  19. {
  20. name”: another-test”,
  21. "data": [
  22. {
  23. status”: active”,
  24. count”: 1
  25. },
  26. {
  27. status”: disabled”,
  28. count”: 1
  29. }
  30. ]
  31. }
  32. ]


  1. [
  2. {
  3. $group: {
  4. _id: {
  5. appName: "$name”,
  6. cs: "$data.statusOne”,
  7. ps: "$data.statusTwo,
  8. },
  9. total: {
  10. $sum: 1,
  11. },
  12. },
  13. },
  14. {
  15. $group: {
  16. _id: "$”,
  17. total: { $sum: "$total" },
  18. ps: {
  19. $addToSet: {
  20. name: "$",
  21. count: "$total",
  22. },
  23. },
  24. cs: {
  25. $addToSet: {
  26. name: "$_id.cs",
  27. count: "$total",
  28. },
  29. },
  30. },
  31. },
  32. {
  33. $project: {
  34. _id: 0,
  35. appName: "$_id",
  36. items: {
  37. $concatArrays: ["$ps", "$cs"],
  38. },
  39. },
  40. },
  41. ]



I have following data in my collection

  1. {
  2. name": test”,
  3. "data": {
  4. statusOne”: enabled”,
  5. statusTwo”: active
  6. }
  7. }
  8. {
  9. name": test”,
  10. "data": {
  11. statusOne”: disabled”,
  12. statusTwo”: active
  13. }
  14. }
  15. {
  16. name": another-test”,
  17. "data": {
  18. statusOne”: disabled”,
  19. statusTwo”: active
  20. }
  21. }

How to write an aggregation query to display the data like below. Need to group by name, statusOne, statusTwo. But segregate the result first based on name field. Then calculate the number of occurrences of statusOne and statusTwo. Both the results need to go into the same output field "data"

  1. output”: [
  2. {
  3. name”: test”,
  4. "data": [
  5. {
  6. status”: active”,
  7. count”: 2
  8. },
  9. {
  10. status”: disabled”,
  11. count”: 1
  12. },
  13. {
  14. status”: enabled”,
  15. count”: 1
  16. }
  17. ]
  18. },
  19. {
  20. name”: another-test”,
  21. "data": [
  22. {
  23. status”: active”,
  24. count”: 1
  25. },
  26. {
  27. status”: disabled”,
  28. count”: 1
  29. }
  30. ]
  31. }
  32. ]

Tried to use group by sequence as mentioned here but no luck

  1. [
  2. {
  3. $group: {
  4. _id: {
  5. appName: "$name”,
  6. cs: "$data.statusOne”,
  7. ps: "$data.statusTwo,
  8. },
  9. total: {
  10. $sum: 1,
  11. },
  12. },
  13. },
  14. {
  15. $group: {
  16. _id: "$”,
  17. total: { $sum: "$total" },
  18. ps: {
  19. $addToSet: {
  20. name: "$",
  21. count: "$total",
  22. },
  23. },
  24. cs: {
  25. $addToSet: {
  26. name: "$_id.cs",
  27. count: "$total",
  28. },
  29. },
  30. },
  31. },
  32. {
  33. $project: {
  34. _id: 0,
  35. appName: "$_id",
  36. items: {
  37. $concatArrays: ["$ps", "$cs"],
  38. },
  39. },
  40. },
  41. ]


得分: 2

  1. db.collection('表名').aggregate([
  2. {
  3. $group: {
  4. _id: { name: "$name", status: "$data.statusOne" },
  5. count: { $sum: 1 }
  6. }
  7. },
  8. {
  9. $group: {
  10. _id: "$",
  11. data: {
  12. $push: {
  13. status: "$_id.status",
  14. count: "$count"
  15. }
  16. }
  17. }
  18. },
  19. {
  20. $project: {
  21. _id: 0,
  22. name: "$_id",
  23. data: 1
  24. }
  25. }
  26. ])
  1. db.collection('tablename').aggregate([
  2. {
  3. $group: {
  4. _id: { name: "$name", status: "$data.statusOne" },
  5. count: { $sum: 1 }
  6. }
  7. },
  8. {
  9. $group: {
  10. _id: "$",
  11. data: {
  12. $push: {
  13. status: "$_id.status",
  14. count: "$count"
  15. }
  16. }
  17. }
  18. },
  19. {
  20. $project: {
  21. _id: 0,
  22. name: "$_id",
  23. data: 1
  24. }
  25. }
  26. ])

use this and change the collection name .


得分: 1


  2. {$project: {
  3. name: true,
  4. /*
  5. 我们可以通过$objectToArray的输出进行更高级的操作,例如仅获取statusOne和statusTwo:
  7. Z: {$filter: {
  8. input: {$objectToArray: "$data"},
  9. cond: {$in: ['$$this.k', ['statusOne','statusTwo']]}
  10. }}
  12. 或者仅查找以'status'开头的键:
  14. Z: {$filter: {
  15. input: {$objectToArray: "$data"},
  16. cond: {$eq: ['status', {$substr:['$$this.k',0,6]}]}
  17. }}
  19. 现在让我们简单点,获取整个内容而不进行过滤。
  20. */
  21. Z: {$objectToArray: "$data"}
  22. }}
  23. ,{$unwind: "$Z"}
  24. // 现在我们有了这样的文档:
  25. //{  
  26. // _id: ObjectId("64d651283d8bc34d3928366d"),
  27. // name: 'another-test',
  28. // Z: {
  29. // k: 'statusOne',
  30. // v: 'disabled'
  31. // }
  32. //}  
  33. // 现在只是分组和重新组织的问题:
  34. ,{$group: {_id: {"name":"$name", "v":"$Z.v"}, N:{$sum:1}}}
  35. ,{$group: {_id: "$", data: {$push: {status:"$_id.v", count:"$N"}} }}
  36. ]);


  1. {
  2. _id: 'test',
  3. data: [
  4. {
  5. status: 'active',
  6. count: 2
  7. },
  8. {
  9. status: 'enabled',
  10. count: 1
  11. },
  12. {
  13. status: 'disabled',
  14. count: 1
  15. }
  16. ]
  17. }
  18. {
  19. _id: 'another-test',
  20. data: [
  21. {
  22. status: 'disabled',
  23. count: 1
  24. },
  25. {
  26. status: 'active',
  27. count: 1
  28. }
  29. ]
  30. }



  2. {$project: {
  3. name: true,
  4. Z: {$objectToArray: "$data"}
  5. }}
  6. /*
  7. 不使用$unwind并使用$group-$sum来计数,让我们自己来做。
  8. 我们将使用新的Z覆盖旧的Z。
  9. 此外,由于这是一个中间步骤,使用较短的变量名称以增加清晰度。
  10. 这个if-then-else构造基本上是这样的:
  11. 对于每种状态类型,status_type_count += 1
  12. 这允许输入具有相同类型的多个状态,例如:
  13. "name": "test",
  14. "data": {
  15. "statusOne": "enabled",
  16. "statusTwo": "enabled"
  17. }
  18. 在MQL中,$reduce循环中,我们不说
  19. object.key = object.key + 1
  20. 而是说:
  21. {$mergeObjects: [ "$$value", {key: {$add:["$$value.key",1]}} ]}
  22. */
  23. ,{$addFields: {Z: {$reduce: {
  24. input: "$Z",
  25. initialValue: {"A":0,"E":0,"D":0},
  26. in: {$cond: {
  27. if: {$eq:["$$this.v","active"]},
  28. then: {$mergeObjects: [ "$$value", {"A": {$add:["$$value.A",1]}} ]},
  29. else: {$cond: {
  30. if: {$eq:["$$this.v","disabled"]},
  31. then: {$mergeObjects: [ "$$value", {"D": {$add:["$$value.D",1]}} ]},
  32. else: {$cond: {
  33. if: {$eq:["$$this.v","enabled"]},
  34. then: {$mergeObjects: [ "$$value", {"E": {$add:["$$value.E",1]}} ]},
  35. else: "$$value"
  36. }}
  37. }}
  38. }}
  39. }}
  40. }}
  41. // 将名称组合在一起并收集计数:
  42. ,{$group: {
  43. _id: "$name", X: {$push: "$Z"}
  44. }}
  45. // 现在,再次运行$reduce以汇总计数并恢复大变量名称:
  46. ,{$project: {data: {$reduce: {
  47. input: "$X",
  48. initialValue: {"active":0,"enabled":0,"disabled":0},
  49. in: {"active": {$add:["$$","$$this.A"]},
  50. "disabled": {$add:["$$value.disabled","$$this.D"]},
  51. "enabled": {$add:["$$value.enabled","$$this.E"]}
  52. }
  53. }}
  54. }}
  55. // 此时,我们“完成”了
  56. <details>
  57. <summary>英文:</summary>
  58. Here is a generalized solution:
  59. ```javascript
  61. {$project: {
  62. name: true,
  63. /*
  64. We could get fancy with the output of $objectToArray, like
  65. only going after statusOne and statusTwo:
  66. Z: {$filter: {
  67. input: {$objectToArray: &quot;$data&quot;},
  68. cond: {$in: [&#39;$$this.k&#39;, [&#39;statusOne&#39;,&#39;statusTwo&#39;]]}
  69. }}
  70. Or only looking for keys that start with &#39;status&#39;:
  71. Z: {$filter: {
  72. input: {$objectToArray: &quot;$data&quot;},
  73. cond: {$eq: [&#39;status&#39;, {$substr:[&#39;$$this.k&#39;,0,6]}]}
  74. }}
  75. Let&#39;s keep it simple now and take the whole thing
  76. without any filtering.
  77. */
  78. Z: {$objectToArray: &quot;$data&quot;}
  79. }}
  80. ,{$unwind: &quot;$Z&quot;}
  81. // Now we have docs like this:
  82. //{
  83. // _id: ObjectId(&quot;64d651283d8bc34d3928366d&quot;),
  84. // name: &#39;another-test&#39;,
  85. // Z: {
  86. // k: &#39;statusOne&#39;,
  87. // v: &#39;disabled&#39;
  88. // }
  89. //}
  90. // Now it is just a matter of grouping and reorganizing:
  91. ,{$group: {_id: {&quot;name&quot;:&quot;$name&quot;, &quot;v&quot;:&quot;$Z.v&quot;}, N:{$sum:1}}}
  92. ,{$group: {_id: &quot;$;, data: {$push: {status:&quot;$_id.v&quot;, count:&quot;$N&quot;}} }}
  93. ]);


  1. {
  2. _id: &#39;test&#39;,
  3. data: [
  4. {
  5. status: &#39;active&#39;,
  6. count: 2
  7. },
  8. {
  9. status: &#39;enabled&#39;,
  10. count: 1
  11. },
  12. {
  13. status: &#39;disabled&#39;,
  14. count: 1
  15. }
  16. ]
  17. }
  18. {
  19. _id: &#39;another-test&#39;,
  20. data: [
  21. {
  22. status: &#39;disabled&#39;,
  23. count: 1
  24. },
  25. {
  26. status: &#39;active&#39;,
  27. count: 1
  28. }
  29. ]
  30. }

What if there are thousands of tests and we don't want to $unwind?

You must be careful when throwing $unwind into a pipeline. If the average number of statuses per test gets large (say, 100) then there will be a LOT of docs in the pipeline. Below is an alternate solution that exploits $reduce:

  2. {$project: {
  3. name: true,
  4. Z: {$objectToArray: &quot;$data&quot;}
  5. }}
  6. /*
  7. Instead of $unwind and using $group-$sum to count things, let&#39;s
  8. do it ourselves. We will overwrite Z with a new Z.
  9. Also, since this is an iterim step, use shorter variable names for
  10. clarity.
  11. This if-then-else construction basically says:
  12. For each status type, status_type_count += 1
  13. This permits the input to have more than 1 status of the same type, e.g.
  14. &quot;name&quot;: &quot;test&quot;,
  15. &quot;data&quot;: {
  16. &quot;statusOne&quot;: &quot;enabled&quot;,
  17. &quot;statusTwo&quot;: &quot;enabled&quot;
  18. }
  19. In MQL in a $reduce loop, we don&#39;t say
  20. object.key = object.key + 1
  21. Instead we say:
  22. {$mergeObjects: [ &quot;$$value&quot;, {key: {$add:[&quot;$$value.key&quot;,1]}} ]}
  23. */
  24. ,{$addFields: {Z: {$reduce: {
  25. input: &quot;$Z&quot;,
  26. initialValue: {&quot;A&quot;:0,&quot;E&quot;:0,&quot;D&quot;:0},
  27. in: {$cond: {
  28. if: {$eq:[&quot;$$this.v&quot;,&quot;active&quot;]},
  29. then: {$mergeObjects: [ &quot;$$value&quot;, {&quot;A&quot;: {$add:[&quot;$$value.A&quot;,1]}} ]},
  30. else: {$cond: {
  31. if: {$eq:[&quot;$$this.v&quot;,&quot;disabled&quot;]},
  32. then: {$mergeObjects: [ &quot;$$value&quot;, {&quot;D&quot;: {$add:[&quot;$$value.D&quot;,1]}} ]},
  33. else: {$cond: {
  34. if: {$eq:[&quot;$$this.v&quot;,&quot;enabled&quot;]},
  35. then: {$mergeObjects: [ &quot;$$value&quot;, {&quot;E&quot;: {$add:[&quot;$$value.E&quot;,1]}} ]},
  36. else: &quot;$$value&quot;
  37. }}
  38. }}
  39. }}
  40. }}
  41. }}
  42. // Bring the names together and collect the counts:
  43. ,{$group: {
  44. _id: &quot;$name&quot;, X: {$push: &quot;$Z&quot;}
  45. }}
  46. // Now, run a $reduce again to sum the counts AND put back the
  47. // big variable names:
  48. ,{$project: {data: {$reduce: {
  49. input: &quot;$X&quot;,
  50. initialValue: {&quot;active&quot;:0,&quot;enabled&quot;:0,&quot;disabled&quot;:0},
  51. in: {&quot;active&quot;: {$add:[&quot;$$;,&quot;$$this.A&quot;]},
  52. &quot;disabled&quot;: {$add:[&quot;$$value.disabled&quot;,&quot;$$this.D&quot;]},
  53. &quot;enabled&quot;: {$add:[&quot;$$value.enabled&quot;,&quot;$$this.E&quot;]}
  54. }
  55. }}
  56. }}
  57. // At this point we are &quot;done&quot; information-wise but the OP was
  58. // looking for an array of status as an RVAL not a key (e.g. &quot;A&quot;)
  59. // so post-process:
  60. ,{$project: {
  61. _id:0,
  62. name:&quot;$_id&quot;,
  63. data: {$map: {
  64. input: {$objectToArray: &quot;$data&quot;},
  65. in: {
  66. &quot;status&quot;:&quot;$$this.k&quot;,
  67. &quot;count&quot;:&quot;$$this.v&quot;,
  68. }
  69. }}
  70. }}
  71. ]);

  • 本文由 发表于 2023年8月10日 20:11:21
  • 转载请务必保留本文链接:



:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:
