英文:
squaring a very large BigInteger is very slow
问题
我正在解决2022年的Advent of Code问题,目前正在处理第11天第2部分。
我有这个解决方案:
(ns advent-of-code-2022.day11
(:require [taoensso.timbre :as log]))
(declare give-monkey-item)
(defrecord Monkey [items operation throw-item num-inspections])
(defn create-monkey [items operation divisor monkey-true monkey-false]
(->Monkey
items
operation
(fn [item monkeys]
(if (= (mod item divisor) 0)
(give-monkey-item item monkeys monkey-true)
(give-monkey-item item monkeys monkey-false)))
0))
(defn give-monkey-item [item monkeys catcher]
(update-in monkeys [catcher :items] #(conj % item)))
(defn throw-items [monkeys current-monkey]
(reduce (fn [monkeys item]
(-> item
(biginteger)
((.operation current-monkey))
((.-throw_item current-monkey) monkeys)))
monkeys
(.items current-monkey)))
(defn adjust-current-monkeys-inspections [monkeys current-monkey-index]
(let [current-monkey (get monkeys current-monkey-index)
num-items (count (.items current-monkey))]
(update-in monkeys [current-monkey-index :num-inspections] #(+ % num-items))))
(defn clear-current-monkeys-items [monkeys current-monkey-index]
(let [current-monkey (get monkeys current-monkey-index)
itemless-monkey (assoc current-monkey :items [])]
(assoc monkeys current-monkey-index itemless-monkey)))
(defn play-turn [monkeys current-monkey-index]
(let [current-monkey (get monkeys current-monkey-index)]
(-> monkeys
(throw-items current-monkey)
(adjust-current-monkeys-inspections current-monkey-index)
(clear-current-monkeys-items current-monkey-index))))
(defn play-round [monkeys]
(reduce (fn [monkeys monkey-index]
(play-turn monkeys monkey-index))
monkeys
(range (count monkeys))))
(defn play-rounds [monkeys num-rounds]
(reduce (fn [rounds round-number]
(log/info round-number)
(let [latest-monkeys (last rounds)]
(conj rounds (play-round latest-monkeys)))
)
[monkeys]
(range num-rounds)))
(defn calculate-monkey-business [monkeys num-rounds]
(let [rounds (play-rounds monkeys num-rounds)]
(->> (last rounds)
(map #(.-num_inspections %))
(sort)
(reverse)
(take 2)
(reduce *))))
我正在使用以下代码进行测试:
(ns advent-of-code-2022.day11-test
(:require [clojure.test :refer :all]
[advent-of-code-2022.day11 :refer :all]))
(def monkeys
[(create-monkey
[54 98 50 94 69 62 53 85]
(fn [old] (* old 13))
3 2 1)
(create-monkey
[71 55 82]
(fn [old] (+ old 2))
13 7 2)
(create-monkey
[77 73 86 72 87]
(fn [old] (+ old 8))
19 4 7)
(create-monkey
[97 91]
(fn [old] (+ old 1))
17 6 5)
(create-monkey
[78 97 51 85 66 63 62]
(fn [old] (* old 17))
5 6 3)
(create-monkey
[88]
(fn [old] (+ old 3))
7 1 0)
(create-monkey
[87 57 63 86 87 53]
(fn [old] (.pow old 2)) ; this is slow for big numbers
11 5 0)
(create-monkey
[73 59 82 65]
(fn [old] (+ old 6))
2 4 3)
])
(deftest day11-full-test-part2
(testing "day11-full-test-part2"
(is (= (calculate-monkey-business monkeys 10000) 10605))))
这个代码尝试运行10000次迭代,但在第200次迭代左右,测试代码中标记的函数 (fn [old] (.pow old 2))
开始对非常大的BigIntegers进行平方运算,并变得非常慢。它如此慢以至于程序可能需要几天才能完成。如果我将 .pow
替换为简单的 +
,程序将在几秒内完成。 .pow
是我尝试过的最新函数;我从一个简单的 (* old old)
开始,尝试了 clojure.math.numeric-tower/expt
,但这三者都有这个问题。
是否有一种有效的方式在Clojure中对这些大的BigIntegers进行平方运算?
英文:
I'm working through 2022's Advent of Code and I'm on Day 11 part 2.
I have this solution:
(ns advent-of-code-2022.day11
(:require [taoensso.timbre :as log]))
(declare give-monkey-item)
(defrecord Monkey [items operation throw-item num-inspections])
(defn create-monkey [items operation divisor monkey-true monkey-false]
(->Monkey
items
operation
(fn [item monkeys]
(if (= (mod item divisor) 0)
(give-monkey-item item monkeys monkey-true)
(give-monkey-item item monkeys monkey-false)))
0))
(defn give-monkey-item [item monkeys catcher]
(update-in monkeys [catcher :items] #(conj % item)))
(defn throw-items [monkeys current-monkey]
(reduce (fn [monkeys item]
(-> item
(biginteger)
((.operation current-monkey))
((.-throw_item current-monkey) monkeys)))
monkeys
(.items current-monkey)))
(defn adjust-current-monkeys-inspections [monkeys current-monkey-index]
(let [current-monkey (get monkeys current-monkey-index)
num-items (count (.items current-monkey))]
(update-in monkeys [current-monkey-index :num-inspections] #(+ % num-items))))
(defn clear-current-monkeys-items [monkeys current-monkey-index]
(let [current-monkey (get monkeys current-monkey-index)
itemless-monkey (assoc current-monkey :items [])]
(assoc monkeys current-monkey-index itemless-monkey)))
(defn play-turn [monkeys current-monkey-index]
(let [current-monkey (get monkeys current-monkey-index)]
(-> monkeys
(throw-items current-monkey)
(adjust-current-monkeys-inspections current-monkey-index)
(clear-current-monkeys-items current-monkey-index))))
(defn play-round [monkeys]
(reduce (fn [monkeys monkey-index]
(play-turn monkeys monkey-index))
monkeys
(range (count monkeys))))
(defn play-rounds [monkeys num-rounds]
(reduce (fn [rounds round-number]
(log/info round-number)
(let [latest-monkeys (last rounds)]
(conj rounds (play-round latest-monkeys))))
[monkeys]
(range num-rounds)))
(defn calculate-monkey-business [monkeys num-rounds]
(let [rounds (play-rounds monkeys num-rounds)]
(->> (last rounds)
(map #(.-num_inspections %))
(sort)
(reverse)
(take 2)
(reduce *))))
I'm testing it using this code:
(ns advent-of-code-2022.day11-test
(:require [clojure.test :refer :all]
[advent-of-code-2022.day11 :refer :all]))
(def monkeys
[(create-monkey
[54 98 50 94 69 62 53 85]
(fn [old] (* old 13))
3 2 1)
(create-monkey
[71 55 82]
(fn [old] (+ old 2))
13 7 2)
(create-monkey
[77 73 86 72 87]
(fn [old] (+ old 8))
19 4 7)
(create-monkey
[97 91]
(fn [old] (+ old 1))
17 6 5)
(create-monkey
[78 97 51 85 66 63 62]
(fn [old] (* old 17))
5 6 3)
(create-monkey
[88]
(fn [old] (+ old 3))
7 1 0)
(create-monkey
[87 57 63 86 87 53]
(fn [old] (.pow old 2)) ; this is slow for big numbers
11 5 0)
(create-monkey
[73 59 82 65]
(fn [old] (+ old 6))
2 4 3)
])
(deftest day11-full-test-part2
(testing "day11-full-test-part2"
(is (= (calculate-monkey-business monkeys 10000) 10605))))
This tries to run for 10000 iterations, but around the 200th iteration the marked function (fn [old] (.pow old 2))
in the test code starts squaring very large BigIntegers and gets extremely slow. It is so slow that the program would probably take a few days to finish. If I replace .pow
with a simple +
, the program completes in a few seconds. .pow
is the latest function I've tried; I started with a simple (* old old)
and tried clojure.math.numeric-tower/expt
as well, but all three have this problem.
Is there a way to square these large BigIntegers efficiently in Clojure?
答案1
得分: 8
Alan Thompson提供了一些建议,可以加快你的程序运行速度一点。我怀疑它不会达到1000倍的速度提升,对于小数字而言,与未经提示的.pow
相比,速度可能更接近100倍,与(* x x)
相比可能是2倍,但即使如此,这还远远不足以挽救你。事实上,正如我将在接下来展示的,速度并不是唯一的限制因素!
你提到在10000次迭代中的第200次左右程序变得很慢,并预测需要几天的运行时间。但你正在看一个具有指数运行时间的过程,人们往往极大地低估了指数公式会变得多么庞大。因此,我预测你将需要真正的算法更改,而不仅仅是一些要撒在现有算法上的仙尘。
为什么我说它是指数性的?嗯,你已经观察到占主导地位的因素是形如 x = x * x
的操作:对一个数字进行平方然后继续。当然,由于所有的花里胡哨,并不是每一轮都涉及对所有数字进行平方,但这将在使你的数字变得很大方面占主导地位。想象一下,只写 x = x * x
这个动作进行了一万轮,从一个无害的数字2开始。你对2进行平方(2^1),然后对结果4进行平方(2^2),然后对结果16进行平方(2^4),然后对结果256进行平方(2^8),依此类推,平方一万次。这个过程计算了2^(2^10000):显然是指数性的(事实上,是超指数性的,因为指数本身呈指数增长)。
这为什么是个问题?嗯,在你的计算机内存中存储类似2^(2^10000)的精确二进制表示根本不可能。在整个宇宙中存储这样一个数的精确二进制表示也是不可能的:根本没有足够的原子来构成所有的位。当然,我过高地估计了每轮都涉及对数字进行平方;但你可以期望大约20%的数字在每轮中被平方,因为测试可被5整除的猴子会传递给进行平方运算的猴子。这仍然远远超出了可能性的范围。
因此,任何使用BigInteger来存储这个数字的精确表示的方法都是行不通的。幸运的是,有一些简单的改进可以让你执行这个模拟的速度飞快。我不想透露太多细节,因为其中很多乐趣就是自己解决问题。以下是一些提示,分别包含在 spoilers 中。
! 你不需要在过程结束时得到精确的数值本身。
! 在过程中,你也不需要精确的数值。
! 你只需要知道它们是否可被一些质数整除。
! 是不是很好,他们从来没有要求你除以什么?
! 你考虑过模算术吗?
英文:
Alan Thompson has some advice that will speed up your program a little. I doubt it will be as much as 1000x - for small numbers, it would probably be more like 100x compared to un-hinted .pow
, and 2x compared to (* x x)
- but even if it were, this will not be close to enough of a speedup to save you. In fact, as I will show in a moment, speed is not your only limiting factor!
You note that things get slow around the 200th iteration out of 10000, and predict a few days' running time. But you're looking at a process with exponential running time, and people tend to grossly underestimate how big an exponential formula will get. So I predict you will need a real algorithmic change, not just some fairy dust to sprinkle on the existing algorithm.
Why do I say it's exponential? Well, you have observed that the dominating factor is operations of the form x = x * x
: squaring a number and then moving on. Of course because of all the monkey business not every round will involve squaring all the numbers, but that's what will dominate in making your numbers large. Imagine ten thousand rounds of just writing x = x * x
, starting with an innocuous number like 2. You square 2 (2^1), then square the resulting 4 (2^2), then square the resulting 16 (2^4), then square the resulting 256 (2^8), and so on, squaring ten thousand times. That process computes 2^(2^10000): clearly exponential (in fact, superexponential, because the exponent itself grows exponentially).
Why is this a problem? Well, storing the exact binary representation of a number like 2^(2^10000) in your computer's memory is simply impossible. Storing such a number's exact binary representation anywhere in the entire universe is impossible: there's not nearly enough atoms to make up all the bits. Of course, I've overestimated by assuming that each round involves squaring the number; but you can expect around 20% of the numbers to be squared each round, because the monkey who tests for divisibility by 5 throws to the monkey who squares. This is still far outside the realm of possibility.
So any approach that uses a BigInteger to store the exact representation of this number is unworkable. Fortunately, there are some easy improvements to make that will let you perform this simulation lightning fast. I don't want to give away the whole thing, since much of the fun is solving the problems yourself. Here are a few hints, spoilered individually.
> ! You don't need the exact values themselves at the end of the process.
> ! You don't need the exact values during the process either
> ! All you need is to tell whether they're divisible by some primes
> ! Isn't it rather nice they never ask you to divide by something?
> ! Have you considered modular arithmetic?
答案2
得分: 1
以下是已翻译的内容:
尝试在代码中使用类型提示,以防您的代码使用反射来查找正确的`.pow`方法运行。它可以使速度提高1000倍:
(fn [old] (.pow ^BigInteger old 2))
您还可以设置:
(set! *warn-on-reflection* true)
这将在需要反射时打印警告,例如:
```pre
反射警告,tst/demo/core.clj:9:23 - 无法解析对方法pow的调用(目标类未知)。
这是一个示例计时:
(ns tst.demo.core
(:use demo.core tupelo.core tupelo.test)
(:require
[tupelo.math :as math]
))
(set! *warn-on-reflection* true)
(defn math-pow-plain [arg] (Math/pow arg 2.0))
(defn math-pow-hint [arg] (Math/pow ^Double arg 2.0))
(defn pow-plain [arg] (.pow arg 2))
(defn pow-hint [arg] (.pow ^BigInteger arg 2))
(def two-bi (biginteger 2))
(defn mult-hint [arg] (.multiply ^BigInteger arg arg ))
(verify
(let
(print "math-pow-plain ")
(time
(dotimes [i N]
(math-pow-plain base-dbl)))
(print "math-pow-hint ")
(time
(dotimes [i N]
(math-pow-hint base-dbl)))
(newline)
(print "pow-plain ")
(time
(dotimes [i N]
(pow-plain base-bi)))
(print "pow-hint ")
(time
(dotimes [i N]
(pow-hint base-bi)))
(print "mult-hint ")
(time
(dotimes [i N]
(mult-hint base-bi)))
)
)
其中的结果:
*********************************************
*************** Running tests ***************
:reloading ()
Testing tst._bootstrap
-----------------------------------
Clojure 1.11.1 Java 19.0.2
-----------------------------------
Testing tst.demo.core
math-pow-plain "Elapsed time: 0.661787 msecs"
math-pow-hint "Elapsed time: 0.66835 msecs"
pow-plain "Elapsed time: 1939.324816 msecs"
pow-hint "Elapsed time: 68.920437 msecs"
mult-hint "Elapsed time: 19.201765 msecs"
Ran 2 tests containing 0 assertions.
0 failures, 0 errors.
Passed all tests
Finished at 23:17:34.914 (run time: 2.038s)
使用我的最爱的模板项目构建。
我们可以使用Criterium获得更精确的结果:
(ns tst.demo.core
(:use demo.core tupelo.core tupelo.test)
(:require
[criterium.core :as crit]
[tupelo.math :as math]
))
<snip>
(newline)
(prn :-----------------------------------------------------------------------------)
(println "math-pow-plain ")
(crit/quick-bench
(math-pow-plain base-dbl))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "math-pow-hint ")
(crit/quick-bench
(math-pow-hint base-dbl))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "pow-plain ")
(crit/quick-bench
(pow-plain base-bi))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "pow-hint ")
(crit/quick-bench
(pow-hint base-bi))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "mult-hint ")
(crit/quick-bench
(mult-hint base-bi))
其结果为:
:-----------------------------------------------------------------------------
math-pow-plain
Evaluation count : 54186174 in 6 samples of 9031029 calls.
Execution time mean : 5.959432 ns
Execution time std-deviation : 2.325416 ns
Execution time lower quantile : 4.152295 ns ( 2.5%)
Execution time upper quantile : 9.127577 ns (97.5%)
Overhead used : 6.671950 ns
:-----------------------------------------------------------------------------
math-pow-hint
Evaluation count : 54730104 in 6 samples of 9121684 calls.
Execution time mean : 6.400439 ns
Execution time std-deviation : 2.262171 ns
Execution time lower quantile : 4.824092 ns ( 2.5%)
Execution time upper quantile : 10.093095 ns (97.5%)
Overhead used : 6.671950 ns
Found 1 outliers in 6 samples (16.6667 %)
low-severe 1 (16.6667 %)
Variance from outliers : 81.8856 % Variance is severely inflated by outliers
以及
:-----------------------------------------------------------------------------
pow-plain
Evaluation count : 290466 in 6 samples of 48411 calls.
Execution time mean : 2.445839 µs
Execution time std-deviation : 326.407836 ns
Execution time lower quantile : 2.121704 µs ( 2.5%)
Execution time upper quantile : 2.838628 µs (97.5%)
Overhead used : 6.671950 ns
:-----------------------------------------------------------------------------
pow-hint
Evaluation count : 7890228 in 6 samples of 1315038 calls.
Execution time mean : 79.168431 ns
Execution time std-deviation : 10.776154 ns
Execution time lower quantile : 71.917202 ns ( 2.5%)
Execution time upper quantile : 96.642962 ns (97.5%)
Overhead used : 6.671950 ns
Found 1 outliers in 6 samples (16.6667 %)
low-severe 1 (16.6667 %)
Variance from outliers : 31.7716 % Variance is moderately inflated by outliers
:-----------------------------------------------------------------------------
mult-hint
Evaluation count : 24351408 in 6 samples of
<details>
<summary>英文:</summary>
Try a type-hint in case your code is using reflection to find the correct `.pow` method to run. It can make a 1000x difference in speed:
(fn [old] (.pow ^BigInteger old 2))
You can also set this:
(set! *warn-on-reflection* true)
which will print a warning when reflection is required, for example:
```pre
Reflection warning, tst/demo/core.clj:9:23 - call to method pow can't be resolved (target class is unknown).
Here is an example timing:
(ns tst.demo.core
(:use demo.core tupelo.core tupelo.test)
(:require
[tupelo.math :as math]
))
(set! *warn-on-reflection* true)
(defn math-pow-plain [arg] (Math/pow arg 2.0))
(defn math-pow-hint [arg] (Math/pow ^Double arg 2.0))
(defn pow-plain [arg] (.pow arg 2))
(defn pow-hint [arg] (.pow ^BigInteger arg 2))
(def two-bi (biginteger 2))
(defn mult-hint [arg] (.multiply ^BigInteger arg arg ))
(verify
(let
(print "math-pow-plain ")
(time
(dotimes [i N]
(math-pow-plain base-dbl)))
(print "math-pow-hint ")
(time
(dotimes [i N]
(math-pow-hint base-dbl)))
(newline)
(print "pow-plain ")
(time
(dotimes [i N]
(pow-plain base-bi)))
(print "pow-hint ")
(time
(dotimes [i N]
(pow-hint base-bi)))
(print "mult-hint ")
(time
(dotimes [i N]
(mult-hint base-bi)))
)
)
with results:
*********************************************
*************** Running tests ***************
:reloading ()
Testing tst._bootstrap
-----------------------------------
Clojure 1.11.1 Java 19.0.2
-----------------------------------
Testing tst.demo.core
math-pow-plain "Elapsed time: 0.661787 msecs"
math-pow-hint "Elapsed time: 0.66835 msecs"
pow-plain "Elapsed time: 1939.324816 msecs"
pow-hint "Elapsed time: 68.920437 msecs"
mult-hint "Elapsed time: 19.201765 msecs"
Ran 2 tests containing 0 assertions.
0 failures, 0 errors.
Passed all tests
Finished at 23:17:34.914 (run time: 2.038s)
Build using my favorite template project.
We can get more accurate results using Criterium:
(ns tst.demo.core
(:use demo.core tupelo.core tupelo.test)
(:require
[criterium.core :as crit]
[tupelo.math :as math]
))
<snip>
(newline)
(prn :-----------------------------------------------------------------------------)
(println "math-pow-plain ")
(crit/quick-bench
(math-pow-plain base-dbl))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "math-pow-hint ")
(crit/quick-bench
(math-pow-hint base-dbl))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "pow-plain ")
(crit/quick-bench
(pow-plain base-bi))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "pow-hint ")
(crit/quick-bench
(pow-hint base-bi))
(newline)
(prn :-----------------------------------------------------------------------------)
(println "mult-hint ")
(crit/quick-bench
(mult-hint base-bi))
with results:
:-----------------------------------------------------------------------------
math-pow-plain
Evaluation count : 54186174 in 6 samples of 9031029 calls.
Execution time mean : 5.959432 ns
Execution time std-deviation : 2.325416 ns
Execution time lower quantile : 4.152295 ns ( 2.5%)
Execution time upper quantile : 9.127577 ns (97.5%)
Overhead used : 6.671950 ns
:-----------------------------------------------------------------------------
math-pow-hint
Evaluation count : 54730104 in 6 samples of 9121684 calls.
Execution time mean : 6.400439 ns
Execution time std-deviation : 2.262171 ns
Execution time lower quantile : 4.824092 ns ( 2.5%)
Execution time upper quantile : 10.093095 ns (97.5%)
Overhead used : 6.671950 ns
Found 1 outliers in 6 samples (16.6667 %)
low-severe 1 (16.6667 %)
Variance from outliers : 81.8856 % Variance is severely inflated by outliers
and
:-----------------------------------------------------------------------------
pow-plain
Evaluation count : 290466 in 6 samples of 48411 calls.
Execution time mean : 2.445839 µs
Execution time std-deviation : 326.407836 ns
Execution time lower quantile : 2.121704 µs ( 2.5%)
Execution time upper quantile : 2.838628 µs (97.5%)
Overhead used : 6.671950 ns
:-----------------------------------------------------------------------------
pow-hint
Evaluation count : 7890228 in 6 samples of 1315038 calls.
Execution time mean : 79.168431 ns
Execution time std-deviation : 10.776154 ns
Execution time lower quantile : 71.917202 ns ( 2.5%)
Execution time upper quantile : 96.642962 ns (97.5%)
Overhead used : 6.671950 ns
Found 1 outliers in 6 samples (16.6667 %)
low-severe 1 (16.6667 %)
Variance from outliers : 31.7716 % Variance is moderately inflated by outliers
:-----------------------------------------------------------------------------
mult-hint
Evaluation count : 24351408 in 6 samples of 4058568 calls.
Execution time mean : 22.017819 ns
Execution time std-deviation : 4.678650 ns
Execution time lower quantile : 18.117006 ns ( 2.5%)
Execution time upper quantile : 29.207411 ns (97.5%)
Overhead used : 6.671950 ns
Ran 2 tests containing 0 assertions.
0 failures, 0 errors.
So we see that math-pow-plain
and math-pow-hint
are both 6 ns (nanoseconds) per call for Double values. For BigIntegers, pow-plain
is about 2400 ns/call compared to 80 ns/call for pow-hint
, about a 30x penalty. Using mult-hint
is about 22 ns/call about a 4x speedup over BigInteger/pow
. Not nothing, and may be worthwhile in a tight loop. Only timing can tell.
Note that you can help to trace down slow parts of your program using the tupelo/profile
library.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论