
huangapple go评论176阅读模式

How to put regexp inside proto struct in golang?




  1. // 由protoc生成的结构体
  2. type ProtoMessage struct {
  3. Data string
  4. Source string
  5. Regexp uint64 // 序列化时不应包含此字段,希望在调用proto.Marshal时能够强制忽略它,ideally,它应该是*regexp.Regexp类型
  6. Left *ProtoMessage
  7. Right *ProtoMessage
  8. }
  9. func main() {
  10. // 发送方计算机执行doSend():
  11. mSrc := &ProtoMessage{Data: "its meee!!!", Source: "hello.+world"}
  12. payload, _ := proto.Marshal(m)
  13. // 接收方计算机执行onRecv():
  14. mDst := new(ProtoMessage)
  15. proto.Unmarshal(payload, mDst)
  16. r, _ := regexp.Compile(mDst.Source)
  17. mDst.Regexp = uint64(unsafe.Pointer(r)) // 顺便说一下,这个方法不起作用
  18. TreeMatch = func(tree *ProtoMessage, line string) string {
  19. if (*regexp.Regexp)(t.Regexp).Match(line) { // 这一行不起作用
  20. return t.Data
  21. }
  22. if tree.Left == nil {
  23. return ""
  24. }
  25. return TreeMatch(tree.Left, line)
  26. }
  27. assert(TreeMatch(mDst, "hello, world") == "its meee!!!") // 如果条件为假,则会引发错误
  28. }



I have a tree-like structure, that has string regexp and I want Go compiled *regexp.Regexp to be part of it as well, in order to run algorithms on the tree. When I marshal and pass it to a different machine I may just recompile it again from the string. What is the correct way to do that, how to force protobuf to store pointers in a structure, that it ideally wont marshal? (the only way that i see is to make uint64 field and cast its value to/from *Regexp)

pseudo-code (because required wanted features seems to be not in the language):

  1. // struct generated by protoc
  2. type ProtoMessage struct {
  3. Data string
  4. Source string
  5. Regexp uint64 // should not be marshalled, should be forcefully omitted from payload when doing proto.Marshal, ideally it should be *regexp.Regexp
  6. Left *ProtoMessage
  7. Right *ProtoMessage
  8. }
  9. func main() {
  10. // sender computer doSend():
  11. mSrc := &ProtoMessage{Data:"its meee!!!", Source: "hello.+world"}
  12. payload, _ := proto.Marshal(m)
  13. //receiver computer: onRecv()
  14. mDst := new(ProtoMessage)
  15. proto.Unmarshal(payload, mDst)
  16. r, _ := regexp.Compile(mDst.Source)
  17. mDst.Regexp = uint64(unsafe.Pointer(r)) // not working btw
  18. TreeMatch = func(tree* ProtoMessage, line string) string {
  19. if *regexp.Regexp(t.Regexp).Match(line) { // not working line
  20. return t.Data
  21. }
  22. if tree.Left == nil {
  23. return ""
  24. }
  25. return TreeMatch(tree.Left, line)
  26. }
  27. assert( TreeMatch(mDst, "hello, world") == "its meee!!!") // panic if condition is false
  28. }

With json marshal i can just pot a pointer to regexp and provide a tag json:"-" in order not to include this field into marshalled structure, and ofc its important feature of marshalling/unmarshalling system to stay efficient (eg use same structure to run algorithms on in, and avoid data copying after unmarshal). How can I do the same with protobuf?


得分: 2


  1. package main
  2. import (
  3. "fmt"
  4. "google.golang.org/protobuf/proto"
  5. "google.golang.org/protobuf/types/known/structpb"
  6. )
  7. func main() {
  8. v := structpb.NewStringValue("hello.+world")
  9. b, err := proto.Marshal(v)
  10. if err != nil {
  11. panic(err)
  12. }
  13. fmt.Printf("%q\n", b) // "\x1a\fhello.+world"
  14. }


  1. package main
  2. import (
  3. "bytes"
  4. "encoding/gob"
  5. "regexp"
  6. )
  7. func main() {
  8. re := regexp.MustCompile("hello.+world")
  9. buf := new(bytes.Buffer)
  10. if err := gob.NewEncoder(buf).Encode(re); err != nil {
  11. panic(err) // type regexp.Regexp has no exported fields
  12. }
  13. }

You can't store a pointer in a protobuf, as the recipient is likely a different computer. Even if you could, you'd get a panic as soon as you tried to dereference the pointer. Easiest thing to do would be just pass the RegExp string, then compile again at the destination:

  1. package main
  2. import (
  3. "fmt"
  4. "google.golang.org/protobuf/proto"
  5. "google.golang.org/protobuf/types/known/structpb"
  6. )
  7. func main() {
  8. v := structpb.NewStringValue("hello.+world")
  9. b, err := proto.Marshal(v)
  10. if err != nil {
  11. panic(err)
  12. }
  13. fmt.Printf("%q\n", b) // "\x1a\fhello.+world"
  14. }

Note: you can't hack around this with Gob either:

  1. package main
  2. import (
  3. "bytes"
  4. "encoding/gob"
  5. "regexp"
  6. )
  7. func main() {
  8. re := regexp.MustCompile("hello.+world")
  9. buf := new(bytes.Buffer)
  10. if err := gob.NewEncoder(buf).Encode(re); err != nil {
  11. panic(err) // type regexp.Regexp has no exported fields
  12. }
  13. }


得分: 1



  1. syntax = "proto3";
  2. package main;
  3. option go_package = ".;main";
  4. message Empty {
  5. }
  6. message ProtoMessage {
  7. string data = 1;
  8. string source = 2;
  9. Empty regexp = 3; // ideally should not be marshalled at all, like `json:"-"` but for protobuf
  10. ProtoMessage left = 4;
  11. ProtoMessage right = 5;
  12. }


  1. package main
  2. import (
  3. "regexp"
  4. "testing"
  5. "unsafe"
  6. )
  7. type Empty struct {
  8. //state protoimpl.MessageState
  9. //sizeCache protoimpl.SizeCache
  10. //unknownFields protoimpl.UnknownFields
  11. }
  12. // struct generated by protoc
  13. type ProtoMessage struct {
  14. //state protoimpl.MessageState
  15. //sizeCache protoimpl.SizeCache
  16. //unknownFields protoimpl.UnknownFields
  17. Data string `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
  18. Source string `protobuf:"bytes,2,opt,name=source,proto3" json:"source,omitempty"`
  19. Regexp *Empty `protobuf:"bytes,3,opt,name=regexp,proto3" json:"regexp,omitempty"` // ideally should not be marshalled at all, like `json:"-"` but for protobuf
  20. Left *ProtoMessage `protobuf:"bytes,4,opt,name=left,proto3" json:"left,omitempty"`
  21. Right *ProtoMessage `protobuf:"bytes,5,opt,name=right,proto3" json:"right,omitempty"`
  22. }
  23. func (p *ProtoMessage) GetCompiledRegexp() *regexp.Regexp {
  24. return (*regexp.Regexp)(unsafe.Pointer(p.Regexp))
  25. }
  26. func (p *ProtoMessage) SetCompiledRegexp(r *regexp.Regexp) {
  27. p.Regexp = (*Empty)(unsafe.Pointer(r))
  28. }
  29. func TreeMatch(tree *ProtoMessage, line string) string {
  30. if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
  31. return tree.Data
  32. }
  33. if tree.Left == nil {
  34. return ""
  35. }
  36. return TreeMatch(tree.Left, line)
  37. }
  38. func TestTreeMatch(t *testing.T) {
  39. //happening at receiver side: imagine its proto.Unmarshal(payload, receiverMsg)
  40. receiverMsg := &ProtoMessage{
  41. Data: "its meee!!!",
  42. Source: "hello.+world",
  43. }
  44. r, _ := regexp.Compile(receiverMsg.Source)
  45. receiverMsg.SetCompiledRegexp(r)
  46. if TreeMatch(receiverMsg, "helloworld") != "" {
  47. t.Fatalf("TreeMatch gives non-existing match!")
  48. }
  49. if TreeMatch(receiverMsg, "hello, world") != "its meee!!!" {
  50. t.Fatalf("TreeMatch is not working!")
  51. }
  52. }
  53. type ProtoMessageDirect struct {
  54. Data string
  55. Source string
  56. Regexp *regexp.Regexp
  57. Left *ProtoMessageDirect
  58. Right *ProtoMessageDirect
  59. }
  60. func (p *ProtoMessageDirect) GetCompiledRegexp() *regexp.Regexp {
  61. return p.Regexp
  62. }
  63. func (p *ProtoMessageDirect) SetCompiledRegexp(r *regexp.Regexp) {
  64. p.Regexp = r
  65. }
  66. func TreeMatchDirect(tree *ProtoMessageDirect, line string) string {
  67. if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
  68. return tree.Data
  69. }
  70. if tree.Left == nil {
  71. return ""
  72. }
  73. return TreeMatchDirect(tree.Left, line)
  74. }
  75. func BenchmarkRegexpCast(b *testing.B) {
  76. receiverMsg := &ProtoMessage{
  77. Data: "its meee!!!",
  78. Source: "hello.+world",
  79. }
  80. r, _ := regexp.Compile(receiverMsg.Source)
  81. receiverMsg.SetCompiledRegexp(r)
  82. b.ResetTimer()
  83. for i := 0; i < b.N; i++ {
  84. TreeMatch(receiverMsg, "hello, world")
  85. }
  86. }
  87. func BenchmarkRegexpDirect(b *testing.B) {
  88. receiverMsg := &ProtoMessageDirect{
  89. Data: "its meee!!!",
  90. Source: "hello.+world",
  91. }
  92. r, _ := regexp.Compile(receiverMsg.Source)
  93. receiverMsg.SetCompiledRegexp(r)
  94. b.ResetTimer()
  95. for i := 0; i < b.N; i++ {
  96. TreeMatchDirect(receiverMsg, "hello, world")
  97. }
  98. }


  1. BenchmarkRegexpCast-20 2741786 376.7 ns/op 16 B/op 1 allocs/op
  2. BenchmarkRegexpDirect-20 3075280 377.0 ns/op 16 B/op 1 allocs/op
  3. PASS

Found the solution, you just have to have any pointer inside your struct (no matter if its marshalling or not, you are not using its unmarshalled value on receiver side):

proto declaration:

  1. syntax = &quot;proto3&quot;;
  2. package main;
  3. option go_package = &quot;.;main&quot;;
  4. message Empty {
  5. }
  6. message ProtoMessage {
  7. string data = 1;
  8. string source = 2;
  9. Empty regexp = 3; // ideally should not be marshalled at all, like `json:&quot;-&quot;` but for protobuf
  10. ProtoMessage left = 4;
  11. ProtoMessage right = 5;
  12. }

testing code:

  1. package main
  2. import (
  3. &quot;regexp&quot;
  4. &quot;testing&quot;
  5. &quot;unsafe&quot;
  6. )
  7. type Empty struct {
  8. //state protoimpl.MessageState
  9. //sizeCache protoimpl.SizeCache
  10. //unknownFields protoimpl.UnknownFields
  11. }
  12. // struct generated by protoc
  13. type ProtoMessage struct {
  14. //state protoimpl.MessageState
  15. //sizeCache protoimpl.SizeCache
  16. //unknownFields protoimpl.UnknownFields
  17. Data string `protobuf:&quot;bytes,1,opt,name=data,proto3&quot; json:&quot;data,omitempty&quot;`
  18. Source string `protobuf:&quot;bytes,2,opt,name=source,proto3&quot; json:&quot;source,omitempty&quot;`
  19. Regexp *Empty `protobuf:&quot;bytes,3,opt,name=regexp,proto3&quot; json:&quot;regexp,omitempty&quot;` // ideally should not be marshalled at all, like `json:&quot;-&quot;` but for protobuf
  20. Left *ProtoMessage `protobuf:&quot;bytes,4,opt,name=left,proto3&quot; json:&quot;left,omitempty&quot;`
  21. Right *ProtoMessage `protobuf:&quot;bytes,5,opt,name=right,proto3&quot; json:&quot;right,omitempty&quot;`
  22. }
  23. func (p *ProtoMessage) GetCompiledRegexp() *regexp.Regexp {
  24. return (*regexp.Regexp)(unsafe.Pointer(p.Regexp))
  25. }
  26. func (p *ProtoMessage) SetCompiledRegexp(r *regexp.Regexp) {
  27. p.Regexp = (*Empty)(unsafe.Pointer(r))
  28. }
  29. func TreeMatch(tree *ProtoMessage, line string) string {
  30. if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
  31. return tree.Data
  32. }
  33. if tree.Left == nil {
  34. return &quot;&quot;
  35. }
  36. return TreeMatch(tree.Left, line)
  37. }
  38. func TestTreeMatch(t *testing.T) {
  39. //happening at receiver side: imagine its proto.Unmarshal(payload, receiverMsg)
  40. receiverMsg := &amp;ProtoMessage{
  41. Data: &quot;its meee!!!&quot;,
  42. Source: &quot;hello.+world&quot;,
  43. }
  44. r, _ := regexp.Compile(receiverMsg.Source)
  45. receiverMsg.SetCompiledRegexp(r)
  46. if TreeMatch(receiverMsg, &quot;helloworld&quot;) != &quot;&quot; {
  47. t.Fatalf(&quot;TreeMatch gives non-existing match!&quot;)
  48. }
  49. if TreeMatch(receiverMsg, &quot;hello, world&quot;) != &quot;its meee!!!&quot; {
  50. t.Fatalf(&quot;TreeMatch is not working!&quot;)
  51. }
  52. }
  53. type ProtoMessageDirect struct {
  54. Data string
  55. Source string
  56. Regexp *regexp.Regexp
  57. Left *ProtoMessageDirect
  58. Right *ProtoMessageDirect
  59. }
  60. func (p *ProtoMessageDirect) GetCompiledRegexp() *regexp.Regexp {
  61. return p.Regexp
  62. }
  63. func (p *ProtoMessageDirect) SetCompiledRegexp(r *regexp.Regexp) {
  64. p.Regexp = r
  65. }
  66. func TreeMatchDirect(tree *ProtoMessageDirect, line string) string {
  67. if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
  68. return tree.Data
  69. }
  70. if tree.Left == nil {
  71. return &quot;&quot;
  72. }
  73. return TreeMatchDirect(tree.Left, line)
  74. }
  75. func BenchmarkRegexpCast(b *testing.B) {
  76. receiverMsg := &amp;ProtoMessage{
  77. Data: &quot;its meee!!!&quot;,
  78. Source: &quot;hello.+world&quot;,
  79. }
  80. r, _ := regexp.Compile(receiverMsg.Source)
  81. receiverMsg.SetCompiledRegexp(r)
  82. b.ResetTimer()
  83. for i := 0; i &lt; b.N; i++ {
  84. TreeMatch(receiverMsg, &quot;hello, world&quot;)
  85. }
  86. }
  87. func BenchmarkRegexpDirect(b *testing.B) {
  88. receiverMsg := &amp;ProtoMessageDirect{
  89. Data: &quot;its meee!!!&quot;,
  90. Source: &quot;hello.+world&quot;,
  91. }
  92. r, _ := regexp.Compile(receiverMsg.Source)
  93. receiverMsg.SetCompiledRegexp(r)
  94. b.ResetTimer()
  95. for i := 0; i &lt; b.N; i++ {
  96. TreeMatchDirect(receiverMsg, &quot;hello, world&quot;)
  97. }
  98. }

TestTreeMatch is passing and Benchmarks shows that such a cast does not create any meaningful difference:

  1. BenchmarkRegexpCast-20 2741786 376.7 ns/op 16 B/op 1 allocs/op
  2. BenchmarkRegexpDirect-20 3075280 377.0 ns/op 16 B/op 1 allocs/op
  3. PASS

  • 本文由 发表于 2021年9月1日 01:32:21
  • 转载请务必保留本文链接:https://go.coder-hub.com/69003068.html



:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:
