8000 snmp plugin memory leak · Issue #610 · collectd/collectd · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
snmp plugin memory leak #610
Closed
Closed
@StianOvrevage

Description

@StianOvrevage

I have a problem where the collectd snmp plugin seems to leak about 50MB of memory per hour on our setup.

valgrind output: https://gist.github.com/StianOvrevage/9f0a48519a1f7f49c685
valgrind output (full, with show-reachable): https://gist.github.com/StianOvrevage/e84245d2aea1f723ce69
valgrind massif output: https://gist.github.com/StianOvrevage/62b28d7f007630367465

There is a bugreport in net-snmp which suggest that it is possible to use the snmp library wrong and get memory leaks that way: http://sourceforge.net/p/net-snmp/bugs/1095/ related to the snmp_pdu_create function.

collectd.conf:

Hostname    "collectd-instance-01-kb-infrastructure"
PIDFile     "/var/run/collectd-instance-01-kb-infrastructure.pid"
TypesDB     "/opt/collectd/share/collectd/types.db" "/opt/collectd/etc/types.custom.db"

Interval     10
Timeout       2
ReadThreads  50

Include "/opt/collectd/etc/instance-01-kb-infrastructure/snmp.conf"

#WriteQueueLimitHigh 1000000
#WriteQueueLimitLow   800000

LoadPlugin logfile
<Plugin logfile>
       LogLevel info
       File "/var/log/collectd-instance-01-kb-infrastructure.log"
       Timestamp true
       PrintSeverity true
</Plugin>

#LoadPlugin write_http
#<Plugin "write_http">
#  <URL "http://localhost:8888/collectd/data">
#    Format "JSON"
    #User "collectd"
    #Password "weCh3ik0"
#  </URL>
#</Plugin>

snmp.conf:
We have 100+ hosts in snmp.conf with mostly identical config. Only included a few here. The rest are mostly identical.

LoadPlugin snmp
<Plugin snmp>

  <Data "BarracudaSpamFW">
    Type "BarracudaSpamFW"
    Table false
    Values "Barracuda-SPAM::systemLoad" "Barracuda-SPAM::domainCount" "Barracuda-SPAM::cpuTemperature" "Barracuda-SPAM::avgEmailLatency" "Barracuda-SPAM::inQueueSize" "Barracuda-SPAM::outQueueSize" "Barracuda-SPAM::deferredQueueSize" "Barracuda-SPAM::notifyQueueSize" "Barracuda-SPAM::totalInboundBlocked" "Barracuda-SPAM::totalInboundVirusBlocked" "Barracuda-SPAM::totalInboundRateControlled" "Barracuda-SPAM::totalInboundQuarantined" "Barracuda-SPAM::totalInboundTagged" "Barracuda-SPAM::totalOutboundPolicyBlocked" "Barracuda-SPAM::totalOutboundSpamBlocked" "Barracuda-SPAM::totalOutboundVirusBlocked" "Barracuda-SPAM::totalOutboundRateControlled" "Barracuda-SPAM::totalOutboundQuarantined" "Barracuda-SPAM::totalAllowed" "Barracuda-SPAM::totalEncrypted" "Barracuda-SPAM::totalRedirected" "Barracuda-SPAM::totalSent"
  </Data>

  <Data "ups_powerware">
    Type "ups_powerware"
    Table false
    #Instance "IF-MIB::ifDescr"
    Values "UPS-MIB::upsSecondsOnBattery.0" "UPS-MIB::upsEstimatedMinutesRemaining.0" "UPS-MIB::upsEstimatedChargeRemaining.0" "UPS-MIB::upsBatteryVoltage.0" "UPS-MIB::upsInputLineBads.0" "UPS-MIB::upsInputNumLines.0" "UPS-MIB::upsInputFrequency.1" "UPS-MIB::upsInputVoltage.1" "UPS-MIB::upsInputCurrent.1" "UPS-MIB::upsOutputFrequency.0" "UPS-MIB::upsOutputNumLines.0" "UPS-MIB::upsOutputVoltage.1" "UPS-MIB::upsOutputCurrent.1" "UPS-MIB::upsOutputPower.1" "UPS-MIB::upsOutputPercentLoad.1"
  </Data>

  <Data "routes">
    Type "routes"
    Table true
    Values "IP-FORWARD-MIB::ipCidrRouteNumber" "IP-FORWARD-MIB::inetCidrRouteNumber" "IPMROUTE-STD-MIB::ipMRouteEntryCount"
  </Data>

  <Data "if_stats">
    Type "if_stats"
    Table true
    Instance "IF-MIB::ifDescr"
    Values "IF-MIB::ifHCInOctets" "IF-MIB::ifHCOutOctets" "IF-MIB::ifHCInUcastPkts" "IF-MIB::ifHCInMulticastPkts" "IF-MIB::ifHCInBroadcastPkts" "IF-MIB::ifHCOutUcastPkts" "IF-MIB::ifHCOutMulticastPkts" "IF-MIB::ifHCOutBroadcastPkts" "IF-MIB::ifInDiscards" "IF-MIB::ifInErrors" "IF-MIB::ifOutDiscards" "IF-MIB::ifOutErrors"
  </Data>

  <Data "sensors">
    Type "sensors"
    Table true
    Instance "ENTITY-MIB::entPhysicalDescr"
    Values "CISCO-ENTITY-SENSOR-MIB::entSensorValue" "CISCO-ENTITY-SENSOR-MIB::entSensorThresholdValue"
  </Data>

  <Data "cisco_cpu">
    Type "cisco_cpu"
    Table true
    Values "CISCO-PROCESS-MIB::cpmCPUTotal5secRev" "CISCO-PROCESS-MIB::cpmCPUTotal1minRev" "CISCO-PROCESS-MIB::cpmCPUTotal5minRev"
  </Data>

  <Data "uptime">
    Type "uptime"
    Table false
    #Instance ""
    Values "DISMAN-EVENT-MIB::sysUpTimeInstance"
  </Data>



  <Host "kbspamfw01">
    Address ""
    Version 2
    Community ""
    Collect "BarracudaSpamFW"
    Interval 20
  </Host>

  <Host "sw01">
    Address ""
    Version 2
    Community ""
    Collect "cisco_cpu" "uptime" "sensors" "if_stats"
    Interval 20
  </Host>

  <Host "sw02">
    Address ""
    Version 2
    Community ""
    Collect "cisco_cpu" "uptime" "sensors" "if_stats" "routes"
    Interval 20
  </Host>

  <Host "sw03">
    Address ""
    Version 2
    Community ""
    Collect "cisco_cpu" "uptime" "sensors" "if_stats"
    Interval 20
  </Host>

</Plugin>

types.custom.db:

if_stats                ifHCInOctets:COUNTER:0:U, ifHCOutOctets:COUNTER:0:U, ifHCInUcastPkts:COUNTER:0:U, ifHCInMulticastPkts:COUNTER:0:U, ifHCInBroadcastPkts:COUNTER:0:U, ifHCOutUcastPkts:COUNTER:0:U, ifHCOutMulticastPkts:COUNTER:0:U, ifHCOutBroadcastPkts:COUNTER:0:U, ifInDiscards:COUNTER:0:U, ifInErrors:COUNTER:0:U, ifOutDiscards:COUNTER:0:U, ifOutErrors:COUNTER:0:U

if_octets_hc     ifHCInOctets:COUNTER:0:U, ifHCOutOctets:COUNTER:0:U

if_packets_hc                   ifHCInUcastPkts:COUNTER:0:U, ifHCInMcastPkts:COUNTER:0:U, ifHCInBcastPkts:COUNTER:0:U, ifHCOutUcastPkts:COUNTER:0:U, ifHCOutMcastPkts:COUNTER:0:U, ifHCOutBcastPkts:COUNTER:0:U

if_drop_discard_err_que         ifInDiscards:COUNTER:0:U, ifInErrors:COUNTER:0:U, ifOutDiscards:COUNTER:0:U, ifOutErrors:COUNTER:0:U

if_octets                    ifInOctets:COUNTER:0:U, ifOutOctets:COUNTER:0:U

if_rgpackets                   ifInUcastPkts:COUNTER:0:U, ifInNUcastPkts:COUNTER:0:U, ifOutUcastPkts:COUNTER:0:U, ifOutNUcastPkts:COUNTER:0:U

sensors                      sensorValue:GAUGE:U:U, sensorThreshold:GAUGE:U:U

uptime                  uptime:GAUGE:U:U

cisco_cpu       cpu5sec:GAUGE:0:100, cpu1min:GAUGE:0:100, cpu5min:GAUGE:0:100

routes  ipv4routes:GAUGE:0:U, ipv6routes:GAUGE:0:U, mcastroutes:GAUGE:0:U

ups_powerware           SecondsOnBattery:GAUGE:0:U,   EstMinuteRem:GAUGE:0:U,   EstChargeRem:GAUGE:0:U,   BatteryVoltage:GAUGE:0:U,   InputLineBads:COUNTER:0:U,   InputNumLines:GAUGE:0:U,  InputFrequency:GAUGE:0:U,   InputVoltage:GAUGE:0:U,   InputCurrent:GAUGE:0:U,  OutputFrequency:GAUGE:0:U,   OutputNumLines:GAUGE:0:U,   OutputVoltage:GAUGE:0:U,   OutputCurrent:GAUGE:0:U,   OutputPower:GAUGE:0:U,   OutputPctLoad:GAUGE:0:U

ups_powerware_v2  Uptime:GAUGE:0:U, BatteryStatus:GAUGE:0:U, SecondsOnBattery:GAUGE:0:U, EstMinuteRem:GAUGE:0:U, EstChargeRem:GAUGE:0:U, BatteryVoltage:GAUGE:0:U, BatteryCurrent:GAUGE:0:U, InputLineBads:COUNTER:0:U, InputFrequency:GAUGE:0:U, InputVoltage:GAUGE:0:U, InputCurrent:GAUGE:0:U, OutputSource:GAUGE:0:U, OutputFrequency:GAUGE:0:U, OutputVoltage:GAUGE:0:U, OutputCurrent:GAUGE:0:U, OutputPower:GAUGE:0:U, OutputPctLoad:GAUGE:0:U, Alarms:GAUGE:0:U

ELTEK_48VPLANT loadDistCurrent:GAUGE:0:50000, acVoltage1:GAUGE:0:50000, acVoltage2:GAUGE:0:50000, acVoltage3:GAUGE:0:50000, batVoltage:GAUGE:0:50000, batCurrent:GAUGE:0:50000, batTemp:GAUGE:-50:150, batTimeToDiscon:GAUGE:0:44640, batCapLeft:GAUGE:0:44640, batCapUsed:GAUGE:0:44640, batCapTotal:GAUGE:0:44640, batQuality:GAUGE:0:100, batFloatVoltConf:GAUGE:0:50000, batBoostVoltConf:GAUGE:0:50000, batHiMajAlmVltCnf:GAUGE:0:50000, batHiMinAlmVltCnf:GAUGE:0:50000, batLoMajAlmVltCnf:GAUGE:0:50000, batLoMinAlmVltCnf:GAUGE:0:50000, rectTotalCurrent:GAUGE:0:50000, rectUtilization:GAUGE:0:50000

ELTEK_RECTIFIER       rectStatOutVolt:GAUGE:0:50000, rectStatTemp:GAUGE:0:50000, rectStatStatus:GAUGE:0:50000, rectStatOutCurr:GAUGE:0:50000

BarracudaSpamFW         systemLoad:GAUGE:0:100, domainCount:GAUGE:0:10000, cpuTemperature:GAUGE:0:200, avgEmailLatency:GAUGE:0:20000, inQSize:GAUGE:0:200000, outQSize:GAUGE:0:200000, deferredQSize:GAUGE:0:200000, notifyQSize:GAUGE:0:200000, totInBlocked:COUNTER:0:100000, totInVirusBlk:COUNTER:0:100000, totInRateCtrl:COUNTER:0:100000, totInQuarantined:COUNTER:0:100000, totInTagged:COUNTER:0:100000, totOutPolicyBlk:COUNTER:0:100000, totOutSpamBlk:COUNTER:0:100000, totOutVirusBlk:COUNTER:0:100000, totOutRateCtrl:COUNTER:0:100000, totOutQuarantined:COUNTER:0:100000, totAllowed:COUNTER:0:100000, totEncrypted:COUNTER:0:100000, totRedirected:COUNTER:0:100000, totSent:COUNTER:0:100000

ipsla   rttAdmNumDistBkt:GAUGE:0:200, rttAdmDistInt:GAUGE:0:200, rttTotalsInit:COUNTER:0:U, rttCollectDrops:COUNTER:0:U, rttCollectTimeouts:COUNTER:0:U, rttCptComplTimeMn:GAUGE:0:100000, rttCptComplTimeMx:GAUGE:0:100000, rttCptSumCmpTm2Hi:COUNTER:0:U, rttCptSumCmpTm2Lo:COUNTER:0:U, rttCptSumCmpTm:COUNTER:0:U, rttCptOverThres:COUNTER:0:U

ipslaminimal   rttCptCompletions:COUNTER:0:U

ipsla2  rttCollectTimeouts:COUNTER:0:U

Metadata

Metadata

Assignees

Labels

BugA genuine bug

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions

    0