是的,可以将这段代码的一部分转换以提高速度吗?

huangapple go评论107阅读模式
英文:

It is possible convert part of this code to improve speed?

问题

I am new here,

Please, I need help to convert part of this code to anything to improve speed, and still compatibility with python programs to use this function. It is possible to improve speed with this?

On my system, an image colored 224x224 consumes 360 milliseconds to finish.

  1. #!/usr/bin/env python3
  2. # coding: utf-8
  3. # original code https://github.com/verhovsky/squircle/blob/master/squircle.py
  4. import cv2
  5. import math
  6. import time
  7. import numpy
  8. _epsilon = 0.0000000001
  9. def _sgn(x):
  10. if x == 0.0:
  11. return 0.0
  12. if x < 0:
  13. return -1.0
  14. return 1.0
  15. def _pixel_coordinates_to_unit(coordinate, max_value):
  16. return coordinate / max_value * 2 - 1
  17. def _one_coordinates_to_pixels(coordinate, max_value):
  18. return (coordinate + 1) / 2 * max_value
  19. def _stretch_square_to_disc(x, y):
  20. if (abs(x) < _epsilon) or (abs(y) < _epsilon):
  21. return x, y
  22. x2 = x * x
  23. y2 = y * y
  24. hypotenuse_squared = x * x + y * y
  25. reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)
  26. multiplier = 1.0
  27. if x2 > y2:
  28. multiplier = _sgn(x) * x * reciprocal_hypotenuse
  29. else:
  30. multiplier = _sgn(y) * y * reciprocal_hypotenuse
  31. return x * multiplier, y * multiplier
  32. def _transform(inp):
  33. result = numpy.zeros_like(inp)
  34. for x, row in enumerate(inp):
  35. unit_x = _pixel_coordinates_to_unit(x, len(inp))
  36. for y, _ in enumerate(row):
  37. unit_y = _pixel_coordinates_to_unit(y, len(row))
  38. try:
  39. uv = _stretch_square_to_disc(unit_x, unit_y)
  40. if uv is None:
  41. continue
  42. u, v = uv
  43. u = _one_coordinates_to_pixels(u, len(inp))
  44. v = _one_coordinates_to_pixels(v, len(row))
  45. result[x][y] = inp[math.floor(u)][math.floor(v)]
  46. except IndexError:
  47. pass
  48. return result
  49. # -- load and test
  50. img = cv2.imread('circle.png')
  51. elapsed = round(time.time() * 1000)
  52. squareImage = _transform(img[0:224, 0:224])
  53. print(str(round(time.time() * 1000) - elapsed)+' ms to squareImage')
  54. cv2.imshow('square', squareImage)
  55. key = cv2.waitKey(0)
  56. cv2.destroyAllWindows()

I expected to try to convert this code or part of this to anything faster, maybe CUDA to run directly on the GPU, or NUMBA, Cython, a library, etc...

英文:

I am new here,

Please, I need help to convert part of this code to anything to improve speed, and still compatibility with python programs to use this function. It is possible to improve speed with this?

On my system, a image colored 224x224 consumes 360 milliseconds to finish.

  1. #!/usr/bin/env python3
  2. # coding: utf-8
  3. # original code https://github.com/verhovsky/squircle/blob/master/squircle.py
  4. import cv2
  5. import math
  6. import time
  7. import numpy
  8. _epsilon = 0.0000000001
  9. def _sgn(x):
  10. if x == 0.0:
  11. return 0.0
  12. if x &lt; 0:
  13. return -1.0
  14. return 1.0
  15. def _pixel_coordinates_to_unit(coordinate, max_value):
  16. return coordinate / max_value * 2 - 1
  17. def _one_coordinates_to_pixels(coordinate, max_value):
  18. return (coordinate + 1) / 2 * max_value
  19. def _stretch_square_to_disc(x, y):
  20. if (abs(x) &lt; _epsilon) or (abs(y) &lt; _epsilon):
  21. return x, y
  22. x2 = x * x
  23. y2 = y * y
  24. hypotenuse_squared = x * x + y * y
  25. reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)
  26. multiplier = 1.0
  27. if x2 &gt; y2:
  28. multiplier = _sgn(x) * x * reciprocal_hypotenuse
  29. else:
  30. multiplier = _sgn(y) * y * reciprocal_hypotenuse
  31. return x * multiplier, y * multiplier
  32. def _transform(inp):
  33. result = numpy.zeros_like(inp)
  34. for x, row in enumerate(inp):
  35. unit_x = _pixel_coordinates_to_unit(x, len(inp))
  36. for y, _ in enumerate(row):
  37. unit_y = _pixel_coordinates_to_unit(y, len(row))
  38. try:
  39. uv = _stretch_square_to_disc(unit_x, unit_y)
  40. if uv is None:
  41. continue
  42. u, v = uv
  43. u = _one_coordinates_to_pixels(u, len(inp))
  44. v = _one_coordinates_to_pixels(v, len(row))
  45. result[x][y] = inp[math.floor(u)][math.floor(v)]
  46. except IndexError:
  47. pass
  48. return result
  49. # -- load and test
  50. img = cv2.imread(&#39;circle.png&#39;)
  51. elapsed = round(time.time() * 1000)
  52. squareImage = _transform(img[0:224, 0:224])
  53. print(str(round(time.time() * 1000) - elapsed)+&#39; ms to squareImage&#39;)
  54. cv2.imshow(&#39;square&#39;, squareImage)
  55. key = cv2.waitKey(0)
  56. cv2.destroyAllWindows()

I expected to try convert this code or part of this to anything more fasted, may be CUDA to run direct on GPU, or NUMBA, Cython, lib etc...

答案1

得分: 2

我使用numba装饰了这些函数(删除了_sgn,使用了np.sign,删除了try..except - 是否需要?):

  1. import math
  2. import time
  3. import cv2
  4. import numpy
  5. from numba import njit
  6. _epsilon = 0.0000000001
  7. @njit
  8. def _pixel_coordinates_to_unit(coordinate, max_value):
  9. return coordinate / max_value * 2 - 1
  10. @njit
  11. def _one_coordinates_to_pixels(coordinate, max_value):
  12. return (coordinate + 1) / 2 * max_value
  13. @njit
  14. def _stretch_square_to_disc(x, y):
  15. if (abs(x) < _epsilon) or (abs(y) < _epsilon):
  16. return x, y
  17. x2 = x * x
  18. y2 = y * y
  19. hypotenuse_squared = x * x + y * y
  20. reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)
  21. multiplier = 1.0
  22. if x2 > y2:
  23. multiplier = np.sign(x) * x * reciprocal_hypotenuse
  24. else:
  25. multiplier = np.sign(y) * y * reciprocal_hypotenuse
  26. return x * multiplier, y * multiplier
  27. @njit
  28. def _transform(inp):
  29. result = numpy.zeros_like(inp)
  30. for x, row in enumerate(inp):
  31. unit_x = _pixel_coordinates_to_unit(x, len(inp))
  32. for y, _ in enumerate(row):
  33. unit_y = _pixel_coordinates_to_unit(y, len(row))
  34. uv = _stretch_square_to_disc(unit_x, unit_y)
  35. if uv is None:
  36. continue
  37. u, v = uv
  38. u = _one_coordinates_to_pixels(u, len(inp))
  39. v = _one_coordinates_to_pixels(v, len(row))
  40. result[x][y] = inp[math.floor(u)][math.floor(v)]
  41. return result
  42. # -- load and test
  43. img = cv2.imread("circle.png")
  44. # warm jit
  45. # this is needed to let numba do the JIT optimizations
  46. # if you run the the function "cold", the running time will be larger
  47. # you can use compile-ahead-of-time
  48. # https://numba.pydata.org/numba-doc/dev/user/pycc.html
  49. squareImage = _transform(img[0:224, 0:224])
  50. elapsed = time.perf_counter_ns()
  51. squareImage = _transform(img[0:224, 0:224])
  52. print(str((time.perf_counter_ns() - elapsed) / 1000) + " us to squareImage")
  53. cv2.imwrite("shashed.png", squareImage)

在我的电脑上(AMD 5700X),它打印出:

  1. 528.596 us to squareImage
  2. # without using numba:
  3. # 47928.774 us to squareImage

使用的图像:

circle.png

是的,可以将这段代码的一部分转换以提高速度吗?

结果

是的,可以将这段代码的一部分转换以提高速度吗?

英文:

I've used numba to decorate the functions (removed the _sgn and used np.sign, removed the try..except - is it needed?):

  1. import math
  2. import time
  3. import cv2
  4. import numpy
  5. from numba import njit
  6. _epsilon = 0.0000000001
  7. @njit
  8. def _pixel_coordinates_to_unit(coordinate, max_value):
  9. return coordinate / max_value * 2 - 1
  10. @njit
  11. def _one_coordinates_to_pixels(coordinate, max_value):
  12. return (coordinate + 1) / 2 * max_value
  13. @njit
  14. def _stretch_square_to_disc(x, y):
  15. if (abs(x) &lt; _epsilon) or (abs(y) &lt; _epsilon):
  16. return x, y
  17. x2 = x * x
  18. y2 = y * y
  19. hypotenuse_squared = x * x + y * y
  20. reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)
  21. multiplier = 1.0
  22. if x2 &gt; y2:
  23. multiplier = np.sign(x) * x * reciprocal_hypotenuse
  24. else:
  25. multiplier = np.sign(y) * y * reciprocal_hypotenuse
  26. return x * multiplier, y * multiplier
  27. @njit
  28. def _transform(inp):
  29. result = numpy.zeros_like(inp)
  30. for x, row in enumerate(inp):
  31. unit_x = _pixel_coordinates_to_unit(x, len(inp))
  32. for y, _ in enumerate(row):
  33. unit_y = _pixel_coordinates_to_unit(y, len(row))
  34. uv = _stretch_square_to_disc(unit_x, unit_y)
  35. if uv is None:
  36. continue
  37. u, v = uv
  38. u = _one_coordinates_to_pixels(u, len(inp))
  39. v = _one_coordinates_to_pixels(v, len(row))
  40. result[x][y] = inp[math.floor(u)][math.floor(v)]
  41. return result
  42. # -- load and test
  43. img = cv2.imread(&quot;circle.png&quot;)
  44. # warm jit
  45. # this is needed to let numba do the JIT optimizations
  46. # if you run the the function &quot;cold&quot;, the running time will be larger
  47. # you can use compile-ahead-of-time
  48. # https://numba.pydata.org/numba-doc/dev/user/pycc.html
  49. squareImage = _transform(img[0:224, 0:224])
  50. elapsed = time.perf_counter_ns()
  51. squareImage = _transform(img[0:224, 0:224])
  52. print(str((time.perf_counter_ns() - elapsed) / 1000) + &quot; us to squareImage&quot;)
  53. cv2.imwrite(&quot;shashed.png&quot;, squareImage)

On my computer (AMD 5700X) it prints:

  1. 528.596 us to squareImage
  2. # without using numba:
  3. # 47928.774 us to squareImage

Images used:

circle.png

是的,可以将这段代码的一部分转换以提高速度吗?

the result

是的,可以将这段代码的一部分转换以提高速度吗?


huangapple
  • 本文由 发表于 2023年8月5日 03:17:12
  • 转载请务必保留本文链接:https://go.coder-hub.com/76838644.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定