1
0
mirror of https://github.com/moby/moby.git synced 2025-04-18 20:44:11 +03:00

Store an endpoint count for networks, for downgrade

Since commit 51d7f95 ("libnet: remove struct endpointCnt") an
endpoint count for networks has not been persisted.

But, on downgrade to a version older than that commit, the
missing field caused daemon startup to fail.

So, create the count in the store - it only needs to exist, it's
no longer maintained as a count of endpoints. On downgrade, the
count is probably zero anyway (the daemon is stopped), but the
older daemon fixes it up on startup if necessary.

Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
Rob Murray 2025-04-15 16:20:16 +01:00
parent b2d06baf9b
commit 380ded6309
3 changed files with 138 additions and 0 deletions

View File

@ -688,6 +688,28 @@ func (c *Controller) NewNetwork(ctx context.Context, networkType, name string, i
}
addToStore:
// First store the endpoint count, then the network. To avoid to
// end up with a datastore containing a network and not an epCnt,
// in case of an ungraceful shutdown during this function call.
//
// TODO(robmry) - remove this once downgrade past 28.1.0 is no longer supported.
// The endpoint count is no longer used, it's created in the store to make
// downgrade work, versions older than 28.1.0 expect to read it and error if they
// can't. The stored count is not maintained, so the downgraded version will
// always find it's zero (which is usually correct because the daemon had
// stopped), but older daemons fix it on startup anyway.
epCnt := &endpointCnt{n: nw}
if err := c.updateToStore(ctx, epCnt); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
if err := c.deleteFromStore(epCnt); err != nil {
log.G(ctx).Warnf("could not rollback from store, epCnt %v on failure (%v): %v", epCnt, retErr, err)
}
}
}()
if err := c.storeNetwork(ctx, nw); err != nil {
return nil, err
}

102
libnetwork/endpoint_cnt.go Normal file
View File

@ -0,0 +1,102 @@
package libnetwork
import (
"encoding/json"
"sync"
"github.com/docker/docker/libnetwork/datastore"
)
// endpointCnt was used to refcount network-endpoint relationships. It's
// unused since v28.1, and kept around only to ensure that users can properly
// downgrade.
//
// TODO(aker): remove this struct in v30.
type endpointCnt struct {
n *Network
Count uint64
dbIndex uint64
dbExists bool
sync.Mutex
}
const epCntKeyPrefix = "endpoint_count"
func (ec *endpointCnt) Key() []string {
ec.Lock()
defer ec.Unlock()
return []string{epCntKeyPrefix, ec.n.id}
}
func (ec *endpointCnt) KeyPrefix() []string {
ec.Lock()
defer ec.Unlock()
return []string{epCntKeyPrefix, ec.n.id}
}
func (ec *endpointCnt) Value() []byte {
ec.Lock()
defer ec.Unlock()
b, err := json.Marshal(ec)
if err != nil {
return nil
}
return b
}
func (ec *endpointCnt) SetValue(value []byte) error {
ec.Lock()
defer ec.Unlock()
return json.Unmarshal(value, &ec)
}
func (ec *endpointCnt) Index() uint64 {
ec.Lock()
defer ec.Unlock()
return ec.dbIndex
}
func (ec *endpointCnt) SetIndex(index uint64) {
ec.Lock()
ec.dbIndex = index
ec.dbExists = true
ec.Unlock()
}
func (ec *endpointCnt) Exists() bool {
ec.Lock()
defer ec.Unlock()
return ec.dbExists
}
func (ec *endpointCnt) Skip() bool {
ec.Lock()
defer ec.Unlock()
return !ec.n.persist
}
func (ec *endpointCnt) New() datastore.KVObject {
ec.Lock()
defer ec.Unlock()
return &endpointCnt{
n: ec.n,
}
}
func (ec *endpointCnt) CopyTo(o datastore.KVObject) error {
ec.Lock()
defer ec.Unlock()
dstEc := o.(*endpointCnt)
dstEc.n = ec.n
dstEc.Count = ec.Count
dstEc.dbExists = ec.dbExists
dstEc.dbIndex = ec.dbIndex
return nil
}

View File

@ -1113,6 +1113,20 @@ func (n *Network) delete(force bool, rmLBEndpoint bool) error {
}
removeFromStore:
// deleteFromStore performs an atomic delete operation and the
// Network.epCnt will help prevent any possible
// race between endpoint join and network delete
//
// TODO(robmry) - remove this once downgrade past 28.1.0 is no longer supported.
// The endpoint count is no longer used, it's created in the store to make
// downgrade work, versions older than 28.1.0 expect to read it and error if they
// can't. The stored count is not maintained, so the downgraded version will
// always find it's zero (which is usually correct because the daemon had
// stopped), but older daemons fix it on startup anyway.
if err = c.deleteFromStore(&endpointCnt{n: n}); err != nil {
log.G(context.TODO()).Debugf("Error deleting endpoint count from store for stale network %s (%s) for deletion: %v", n.Name(), n.ID(), err)
}
if err = c.deleteStoredNetwork(n); err != nil {
return fmt.Errorf("error deleting network from store: %v", err)
}