
huangapple go评论125阅读模式

How to put regexp inside proto struct in golang?




// 由protoc生成的结构体
type ProtoMessage struct {
	Data   string
	Source string
	Regexp uint64 // 序列化时不应包含此字段,希望在调用proto.Marshal时能够强制忽略它,ideally,它应该是*regexp.Regexp类型
	Left   *ProtoMessage
	Right  *ProtoMessage

func main() {
	// 发送方计算机执行doSend():
	mSrc := &ProtoMessage{Data: "its meee!!!", Source: "hello.+world"}
	payload, _ := proto.Marshal(m)

	// 接收方计算机执行onRecv():
	mDst := new(ProtoMessage)
	proto.Unmarshal(payload, mDst)
	r, _ := regexp.Compile(mDst.Source)
	mDst.Regexp = uint64(unsafe.Pointer(r)) // 顺便说一下,这个方法不起作用

	TreeMatch = func(tree *ProtoMessage, line string) string {
		if (*regexp.Regexp)(t.Regexp).Match(line) { // 这一行不起作用
			return t.Data
		if tree.Left == nil {
			return ""
		return TreeMatch(tree.Left, line)

	assert(TreeMatch(mDst, "hello, world") == "its meee!!!") // 如果条件为假,则会引发错误



I have a tree-like structure, that has string regexp and I want Go compiled *regexp.Regexp to be part of it as well, in order to run algorithms on the tree. When I marshal and pass it to a different machine I may just recompile it again from the string. What is the correct way to do that, how to force protobuf to store pointers in a structure, that it ideally wont marshal? (the only way that i see is to make uint64 field and cast its value to/from *Regexp)

pseudo-code (because required wanted features seems to be not in the language):

// struct generated by protoc
type ProtoMessage struct {
	Data string
	Source string
	Regexp uint64 // should not be marshalled, should be forcefully omitted from payload when doing proto.Marshal, ideally it should be *regexp.Regexp
	Left   *ProtoMessage
	Right  *ProtoMessage

func main() {
	// sender computer doSend():
	mSrc := &ProtoMessage{Data:"its meee!!!", Source: "hello.+world"}
	payload, _ := proto.Marshal(m)

	//receiver computer: onRecv()
	mDst := new(ProtoMessage)
	proto.Unmarshal(payload, mDst)
	r, _ := regexp.Compile(mDst.Source)
	mDst.Regexp = uint64(unsafe.Pointer(r)) // not working btw

	TreeMatch = func(tree* ProtoMessage, line string) string {
		if *regexp.Regexp(t.Regexp).Match(line) { // not working line
			return t.Data
		if tree.Left == nil {
			return ""
		return TreeMatch(tree.Left, line)

	assert( TreeMatch(mDst, "hello, world") == "its meee!!!") // panic if condition is false

With json marshal i can just pot a pointer to regexp and provide a tag json:"-" in order not to include this field into marshalled structure, and ofc its important feature of marshalling/unmarshalling system to stay efficient (eg use same structure to run algorithms on in, and avoid data copying after unmarshal). How can I do the same with protobuf?


得分: 2


package main

import (

func main() {
   v := structpb.NewStringValue("hello.+world")
   b, err := proto.Marshal(v)
   if err != nil {
   fmt.Printf("%q\n", b) // "\x1a\fhello.+world"


package main

import (

func main() {
   re := regexp.MustCompile("hello.+world")
   buf := new(bytes.Buffer)
   if err := gob.NewEncoder(buf).Encode(re); err != nil {
      panic(err) // type regexp.Regexp has no exported fields

You can't store a pointer in a protobuf, as the recipient is likely a different computer. Even if you could, you'd get a panic as soon as you tried to dereference the pointer. Easiest thing to do would be just pass the RegExp string, then compile again at the destination:

package main

import (

func main() {
   v := structpb.NewStringValue("hello.+world")
   b, err := proto.Marshal(v)
   if err != nil {
   fmt.Printf("%q\n", b) // "\x1a\fhello.+world"

Note: you can't hack around this with Gob either:

package main

import (

func main() {
   re := regexp.MustCompile("hello.+world")
   buf := new(bytes.Buffer)
   if err := gob.NewEncoder(buf).Encode(re); err != nil {
      panic(err) // type regexp.Regexp has no exported fields


得分: 1



syntax = "proto3";
package main;
option go_package = ".;main";

message Empty {

message ProtoMessage {
    string data    = 1;
    string source = 2;
    Empty regexp = 3; // ideally should not be marshalled at all, like `json:"-"` but for protobuf
    ProtoMessage left = 4;
    ProtoMessage right = 5;


package main

import (

type Empty struct {
    //state         protoimpl.MessageState
    //sizeCache     protoimpl.SizeCache
    //unknownFields protoimpl.UnknownFields

// struct generated by protoc
type ProtoMessage struct {
    //state         protoimpl.MessageState
    //sizeCache     protoimpl.SizeCache
    //unknownFields protoimpl.UnknownFields

    Data   string        `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
    Source string        `protobuf:"bytes,2,opt,name=source,proto3" json:"source,omitempty"`
    Regexp *Empty        `protobuf:"bytes,3,opt,name=regexp,proto3" json:"regexp,omitempty"` // ideally should not be marshalled at all, like `json:"-"` but for protobuf
    Left   *ProtoMessage `protobuf:"bytes,4,opt,name=left,proto3" json:"left,omitempty"`
    Right  *ProtoMessage `protobuf:"bytes,5,opt,name=right,proto3" json:"right,omitempty"`

func (p *ProtoMessage) GetCompiledRegexp() *regexp.Regexp {
    return (*regexp.Regexp)(unsafe.Pointer(p.Regexp))

func (p *ProtoMessage) SetCompiledRegexp(r *regexp.Regexp) {
    p.Regexp = (*Empty)(unsafe.Pointer(r))

func TreeMatch(tree *ProtoMessage, line string) string {
    if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
        return tree.Data
    if tree.Left == nil {
        return ""
    return TreeMatch(tree.Left, line)

func TestTreeMatch(t *testing.T) {
    //happening at receiver side: imagine its proto.Unmarshal(payload, receiverMsg)
    receiverMsg := &ProtoMessage{
        Data:   "its meee!!!",
        Source: "hello.+world",
    r, _ := regexp.Compile(receiverMsg.Source)
    if TreeMatch(receiverMsg, "helloworld") != "" {
        t.Fatalf("TreeMatch gives non-existing match!")
    if TreeMatch(receiverMsg, "hello, world") != "its meee!!!" {
        t.Fatalf("TreeMatch is not working!")

type ProtoMessageDirect struct {
    Data   string
    Source string
    Regexp *regexp.Regexp
    Left   *ProtoMessageDirect
    Right  *ProtoMessageDirect

func (p *ProtoMessageDirect) GetCompiledRegexp() *regexp.Regexp {
    return p.Regexp

func (p *ProtoMessageDirect) SetCompiledRegexp(r *regexp.Regexp) {
    p.Regexp = r

func TreeMatchDirect(tree *ProtoMessageDirect, line string) string {
    if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
        return tree.Data
    if tree.Left == nil {
        return ""
    return TreeMatchDirect(tree.Left, line)

func BenchmarkRegexpCast(b *testing.B) {
    receiverMsg := &ProtoMessage{
        Data:   "its meee!!!",
        Source: "hello.+world",
    r, _ := regexp.Compile(receiverMsg.Source)
    for i := 0; i < b.N; i++ {
        TreeMatch(receiverMsg, "hello, world")

func BenchmarkRegexpDirect(b *testing.B) {
    receiverMsg := &ProtoMessageDirect{
        Data:   "its meee!!!",
        Source: "hello.+world",
    r, _ := regexp.Compile(receiverMsg.Source)
    for i := 0; i < b.N; i++ {
        TreeMatchDirect(receiverMsg, "hello, world")


BenchmarkRegexpCast-20      	 2741786	       376.7 ns/op	      16 B/op	       1 allocs/op
BenchmarkRegexpDirect-20    	 3075280	       377.0 ns/op	      16 B/op	       1 allocs/op

Found the solution, you just have to have any pointer inside your struct (no matter if its marshalling or not, you are not using its unmarshalled value on receiver side):

proto declaration:

syntax = &quot;proto3&quot;;
package main;
option go_package = &quot;.;main&quot;;
message Empty {
message ProtoMessage {
string data    = 1;
string source = 2;
Empty regexp = 3; // ideally should not be marshalled at all, like `json:&quot;-&quot;` but for protobuf
ProtoMessage left = 4;
ProtoMessage right = 5;

testing code:

package main
import (
type Empty struct {
//state         protoimpl.MessageState
//sizeCache     protoimpl.SizeCache
//unknownFields protoimpl.UnknownFields
// struct generated by protoc
type ProtoMessage struct {
//state         protoimpl.MessageState
//sizeCache     protoimpl.SizeCache
//unknownFields protoimpl.UnknownFields
Data   string        `protobuf:&quot;bytes,1,opt,name=data,proto3&quot; json:&quot;data,omitempty&quot;`
Source string        `protobuf:&quot;bytes,2,opt,name=source,proto3&quot; json:&quot;source,omitempty&quot;`
Regexp *Empty        `protobuf:&quot;bytes,3,opt,name=regexp,proto3&quot; json:&quot;regexp,omitempty&quot;` // ideally should not be marshalled at all, like `json:&quot;-&quot;` but for protobuf
Left   *ProtoMessage `protobuf:&quot;bytes,4,opt,name=left,proto3&quot; json:&quot;left,omitempty&quot;`
Right  *ProtoMessage `protobuf:&quot;bytes,5,opt,name=right,proto3&quot; json:&quot;right,omitempty&quot;`
func (p *ProtoMessage) GetCompiledRegexp() *regexp.Regexp {
return (*regexp.Regexp)(unsafe.Pointer(p.Regexp))
func (p *ProtoMessage) SetCompiledRegexp(r *regexp.Regexp) {
p.Regexp = (*Empty)(unsafe.Pointer(r))
func TreeMatch(tree *ProtoMessage, line string) string {
if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
return tree.Data
if tree.Left == nil {
return &quot;&quot;
return TreeMatch(tree.Left, line)
func TestTreeMatch(t *testing.T) {
//happening at receiver side: imagine its proto.Unmarshal(payload, receiverMsg)
receiverMsg := &amp;ProtoMessage{
Data:   &quot;its meee!!!&quot;,
Source: &quot;hello.+world&quot;,
r, _ := regexp.Compile(receiverMsg.Source)
if TreeMatch(receiverMsg, &quot;helloworld&quot;) != &quot;&quot; {
t.Fatalf(&quot;TreeMatch gives non-existing match!&quot;)
if TreeMatch(receiverMsg, &quot;hello, world&quot;) != &quot;its meee!!!&quot; {
t.Fatalf(&quot;TreeMatch is not working!&quot;)
type ProtoMessageDirect struct {
Data   string
Source string
Regexp *regexp.Regexp
Left   *ProtoMessageDirect
Right  *ProtoMessageDirect
func (p *ProtoMessageDirect) GetCompiledRegexp() *regexp.Regexp {
return p.Regexp
func (p *ProtoMessageDirect) SetCompiledRegexp(r *regexp.Regexp) {
p.Regexp = r
func TreeMatchDirect(tree *ProtoMessageDirect, line string) string {
if tree.GetCompiledRegexp().Match([]byte(line)) { // not working line
return tree.Data
if tree.Left == nil {
return &quot;&quot;
return TreeMatchDirect(tree.Left, line)
func BenchmarkRegexpCast(b *testing.B) {
receiverMsg := &amp;ProtoMessage{
Data:   &quot;its meee!!!&quot;,
Source: &quot;hello.+world&quot;,
r, _ := regexp.Compile(receiverMsg.Source)
for i := 0; i &lt; b.N; i++ {
TreeMatch(receiverMsg, &quot;hello, world&quot;)
func BenchmarkRegexpDirect(b *testing.B) {
receiverMsg := &amp;ProtoMessageDirect{
Data:   &quot;its meee!!!&quot;,
Source: &quot;hello.+world&quot;,
r, _ := regexp.Compile(receiverMsg.Source)
for i := 0; i &lt; b.N; i++ {
TreeMatchDirect(receiverMsg, &quot;hello, world&quot;)

TestTreeMatch is passing and Benchmarks shows that such a cast does not create any meaningful difference:

BenchmarkRegexpCast-20      	 2741786	       376.7 ns/op	      16 B/op	       1 allocs/op
BenchmarkRegexpDirect-20    	 3075280	       377.0 ns/op	      16 B/op	       1 allocs/op

  • 本文由 发表于 2021年9月1日 01:32:21
  • 转载请务必保留本文链接:https://go.coder-hub.com/69003068.html



:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:
