8000 Accelerated DHT Client causes OOM kill upon start of IPFS, ResourceMgr.MaxMemory ignored · Issue #9990 · ipfs/kubo · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
Accelerated DHT Client causes OOM kill upon start of IPFS, ResourceMgr.MaxMemory ignored #9990
Open
@dokterbob

Description

@dokterbob

Checklist

Installation method

third-party binary

Version

Kubo version: 0.20.0-b8c4725
Repo version: 13
System version: amd64/linux
Golang version: go1.19.1

Config

{
  "API": {
    "HTTPHeaders": {
      "Access-Control-Allow-Methods": [
        "PUT",
        "POST"
      ],
      "Access-Control-Allow-Origin": [
        "http://<private>:10125",
        "http://localhost:3000",
        "http://127.0.0.1:5001",
        "https://webui.ipfs.io"
      ]
    }
  },
  "Addresses": {
    "API": "/ip4/0.0.0.0/tcp/5001",
    "Announce": [],
    "AppendAnnounce": [
      "/dns4/<private>/tcp/4001",
      "/dns4/<private>/udp/4001/quic",
      "/dns4/<private>/udp/4001/quic-v1",
      "/dns4/<private>/udp/4001/quic-v1/webtransport",
      "/dns4/<private>/tcp/4001",
      "/dns4/<private>/udp/4001/quic",
      "/dns4/<private>/udp/4001/quic-v1",
      "/dns4/<private>/udp/4001/quic-v1/webtransport"
    ],
    "Gateway": "/ip4/0.0.0.0/tcp/8080",
    "NoAnnounce": [
      "/ip4/10.0.0.0/ipcidr/8",
      "/ip4/100.64.0.0/ipcidr/10",
      "/ip4/169.254.0.0/ipcidr/16",
      "/ip4/172.16.0.0/ipcidr/12",
      "/ip4/192.0.0.0/ipcidr/24",
      "/ip4/192.0.2.0/ipcidr/24",
      "/ip4/192.168.0.0/ipcidr/16",
      "/ip4/198.18.0.0/ipcidr/15",
      "/ip4/198.51.100.0/ipcidr/24",
      "/ip4/203.0.113.0/ipcidr/24",
      "/ip4/240.0.0.0/ipcidr/4",
      "/ip6/100::/ipcidr/64",
      "/ip6/2001:2::/ipcidr/48",
      "/ip6/2001:db8::/ipcidr/32",
      "/ip6/fc00::/ipcidr/7",
      "/ip6/fe80::/ipcidr/10"
    ],
    "Swarm": [
      "/ip4/0.0.0.0/tcp/4001",
      "/ip6/::/tcp/4001",
      "/ip4/0.0.0.0/udp/4001/quic",
      "/ip4/0.0.0.0/udp/4001/quic-v1",
      "/ip4/0.0.0.0/udp/4001/quic-v1/webtransport",
      "/ip6/::/udp/4001/quic",
      "/ip6/::/udp/4001/quic-v1",
      "/ip6/::/udp/4001/quic-v1/webtransport"
    ]
  },
  "AutoNAT": {},
  "Bootstrap": [
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmNnooDu7bfjPFoTZYxMNLWUQJyrVwtbZg5gBMjTezGAJN",
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmQCU2EcMqAqQPR2i9bChDtGNJchTbq5TbXJJ16u19uLTa",
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmbLHAnMoJPWSCR5Zhtx6BHJX9KiKNN6tpvbUcqanj75Nb",
    "/dnsaddr/bootstrap.libp2p.io/p2p/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt",
    "/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ",
    "/ip4/104.131.131.82/udp/4001/quic/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ"
  ],
  "DNS": {
    "Resolvers": {}
  },
  "Datastore": {
    "BloomFilterSize": 1048576,
    "GCPeriod": "1h",
    "HashOnRead": false,
    "Spec": {
      "mounts": [
      
8000
  {
          "child": {
            "path": "blocks",
            "shardFunc": "/repo/flatfs/shard/v1/next-to-last/3",
            "sync": false,
            "type": "flatfs"
          },
          "mountpoint": "/blocks",
          "prefix": "flatfs.datastore",
          "type": "measure"
        },
        {
          "child": {
            "compression": "none",
            "path": "datastore",
            "type": "levelds"
          },
          "mountpoint": "/",
          "prefix": "leveldb.datastore",
          "type": "measure"
        }
      ],
      "type": "mount"
    },
    "StorageGCWatermark": 90,
    "StorageMax": "100GB"
  },
  "Discovery": {
    "MDNS": {
      "Enabled": false
    }
  },
  "Experimental": {
    "AcceleratedDHTClient": true,
    "FilestoreEnabled": true,
    "GraphsyncEnabled": false,
    "Libp2pStreamMounting": false,
    "OptimisticProvide": false,
    "OptimisticProvideJobsPoolSize": 0,
    "P2pHttpProxy": false,
    "StrategicProviding": false,
    "UrlstoreEnabled": false
  },
  "Gateway": {
    "APICommands": [],
    "HTTPHeaders": {
      "Access-Control-Allow-Headers": [
        "X-Requested-With",
        "Range",
        "User-Agent"
      ],
      "Access-Control-Allow-Methods": [
        "GET"
      ],
      "Access-Control-Allow-Origin": [
        "*"
      ]
    },
    "NoDNSLink": false,
    "NoFetch": false,
    "PathPrefixes": [],
    "PublicGateways": null,
    "RootRedirect": ""
  },
  "Identity": {
    "PeerID": "<private>",
    "PrivKey": "<definitely private>"
  },
  "Internal": {},
  "Ipns": {
    "RecordLifetime": "",
    "RepublishPeriod": "",
    "ResolveCacheSize": 128
  },
  "Migration": {
    "DownloadSources": [],
    "Keep": ""
  },
  "Mounts": {
    "FuseAllowOther": false,
    "IPFS": "/ipfs",
    "IPNS": "/ipns"
  },
  "Peering": {
    "Peers": null
  },
  "Pinning": {
    "RemoteServices": {}
  },
  "Plugins": {
    "Plugins": null
  },
  "Provider": {
    "Strategy": ""
  },
  "Pubsub": {
    "DisableSigning": false,
    "Router": ""
  },
  "Reprovider": {},
  "Routing": {
    "Methods": null,
    "Routers": null
  },
  "Swarm": {
    "AddrFilters": [
      "/ip4/10.0.0.0/ipcidr/8",
      "/ip4/100.64.0.0/ipcidr/10",
      "/ip4/169.254.0.0/ipcidr/16",
      "/ip4/172.16.0.0/ipcidr/12",
      "/ip4/192.0.0.0/ipcidr/24",
      "/ip4/192.0.2.0/ipcidr/24",
      "/ip4/198.18.0.0/ipcidr/15",
      "/ip4/198.51.100.0/ipcidr/24",
      "/ip4/203.0.113.0/ipcidr/24",
      "/ip4/240.0.0.0/ipcidr/4",
      "/ip6/100::/ipcidr/64",
      "/ip6/2001:2::/ipcidr/48",
      "/ip6/2001:db8::/ipcidr/32",
      "/ip6/fc00::/ipcidr/7",
      "/ip6/fe80::/ipcidr/10"
    ],
    "ConnMgr": {},
    "DisableBandwidthMetrics": false,
    "DisableNatPortMap": true,
    "RelayClient": {},
    "RelayService": {},
    "ResourceMgr": {
      "Limits": {},
      "MaxMemory": "1GB"
    },
    "Transports": {
      "Multiplexers": {},
      "Network": {},
      "Security": {}
    }
  }
}

Description

Context

Starting kubo

  • Within a container with memory limits of 2, 3 or 4 GB.
  • With ResourceMgr.MaxMemory set to 1 or 2 GB.
  • With ports forwarded on 100/100 fibre line.
  • With about 10 TB of 1 MB blocks added through filestore (so ~10M blocks).

Consistently results in a OOM kill within minutes of startup.

Observations

When the node is started;

  1. It establishes ~2k peer connections (as, arguably, it should).
  2. 6-9 MB/s of DHT traffic is flowing in, with 200-400 KB/s going out.
  3. Within minutes (less than 15m, more than 1m) it consistently swallows all available memory and gets killed.

With AcceleratedDHTClient set to false, this problem does not occur. Hence, it seems that the Accelerated DHT client does not adhere to the resource manager's memory limit. Perhaps the defaults just need to be made accelerated DHT aware.

Might want to fix this before taking it out of the experimental phase. ;)

Possibly related/relevant

Metadata

Metadata

Assignees

No one assigned

    Labels

    P1High: Likely tackled by core team if no one steps upkind/bugA bug in existing code (including security flaws)status/blockedUnable to be worked further until needs are met

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      0