Description
I have a problem where the collectd snmp plugin seems to leak about 50MB of memory per hour on our setup.
valgrind output: https://gist.github.com/StianOvrevage/9f0a48519a1f7f49c685
valgrind output (full, with show-reachable): https://gist.github.com/StianOvrevage/e84245d2aea1f723ce69
valgrind massif output: https://gist.github.com/StianOvrevage/62b28d7f007630367465
There is a bugreport in net-snmp which suggest that it is possible to use the snmp library wrong and get memory leaks that way: http://sourceforge.net/p/net-snmp/bugs/1095/ related to the snmp_pdu_create function.
collectd.conf:
Hostname "collectd-instance-01-kb-infrastructure"
PIDFile "/var/run/collectd-instance-01-kb-infrastructure.pid"
TypesDB "/opt/collectd/share/collectd/types.db" "/opt/collectd/etc/types.custom.db"
Interval 10
Timeout 2
ReadThreads 50
Include "/opt/collectd/etc/instance-01-kb-infrastructure/snmp.conf"
#WriteQueueLimitHigh 1000000
#WriteQueueLimitLow 800000
LoadPlugin logfile
<Plugin logfile>
LogLevel info
File "/var/log/collectd-instance-01-kb-infrastructure.log"
Timestamp true
PrintSeverity true
</Plugin>
#LoadPlugin write_http
#<Plugin "write_http">
# <URL "http://localhost:8888/collectd/data">
# Format "JSON"
#User "collectd"
#Password "weCh3ik0"
# </URL>
#</Plugin>
snmp.conf:
We have 100+ hosts in snmp.conf with mostly identical config. Only included a few here. The rest are mostly identical.
LoadPlugin snmp
<Plugin snmp>
<Data "BarracudaSpamFW">
Type "BarracudaSpamFW"
Table false
Values "Barracuda-SPAM::systemLoad" "Barracuda-SPAM::domainCount" "Barracuda-SPAM::cpuTemperature" "Barracuda-SPAM::avgEmailLatency" "Barracuda-SPAM::inQueueSize" "Barracuda-SPAM::outQueueSize" "Barracuda-SPAM::deferredQueueSize" "Barracuda-SPAM::notifyQueueSize" "Barracuda-SPAM::totalInboundBlocked" "Barracuda-SPAM::totalInboundVirusBlocked" "Barracuda-SPAM::totalInboundRateControlled" "Barracuda-SPAM::totalInboundQuarantined" "Barracuda-SPAM::totalInboundTagged" "Barracuda-SPAM::totalOutboundPolicyBlocked" "Barracuda-SPAM::totalOutboundSpamBlocked" "Barracuda-SPAM::totalOutboundVirusBlocked" "Barracuda-SPAM::totalOutboundRateControlled" "Barracuda-SPAM::totalOutboundQuarantined" "Barracuda-SPAM::totalAllowed" "Barracuda-SPAM::totalEncrypted" "Barracuda-SPAM::totalRedirected" "Barracuda-SPAM::totalSent"
</Data>
<Data "ups_powerware">
Type "ups_powerware"
Table false
#Instance "IF-MIB::ifDescr"
Values "UPS-MIB::upsSecondsOnBattery.0" "UPS-MIB::upsEstimatedMinutesRemaining.0" "UPS-MIB::upsEstimatedChargeRemaining.0" "UPS-MIB::upsBatteryVoltage.0" "UPS-MIB::upsInputLineBads.0" "UPS-MIB::upsInputNumLines.0" "UPS-MIB::upsInputFrequency.1" "UPS-MIB::upsInputVoltage.1" "UPS-MIB::upsInputCurrent.1" "UPS-MIB::upsOutputFrequency.0" "UPS-MIB::upsOutputNumLines.0" "UPS-MIB::upsOutputVoltage.1" "UPS-MIB::upsOutputCurrent.1" "UPS-MIB::upsOutputPower.1" "UPS-MIB::upsOutputPercentLoad.1"
</Data>
<Data "routes">
Type "routes"
Table true
Values "IP-FORWARD-MIB::ipCidrRouteNumber" "IP-FORWARD-MIB::inetCidrRouteNumber" "IPMROUTE-STD-MIB::ipMRouteEntryCount"
</Data>
<Data "if_stats">
Type "if_stats"
Table true
Instance "IF-MIB::ifDescr"
Values "IF-MIB::ifHCInOctets" "IF-MIB::ifHCOutOctets" "IF-MIB::ifHCInUcastPkts" "IF-MIB::ifHCInMulticastPkts" "IF-MIB::ifHCInBroadcastPkts" "IF-MIB::ifHCOutUcastPkts" "IF-MIB::ifHCOutMulticastPkts" "IF-MIB::ifHCOutBroadcastPkts" "IF-MIB::ifInDiscards" "IF-MIB::ifInErrors" "IF-MIB::ifOutDiscards" "IF-MIB::ifOutErrors"
</Data>
<Data "sensors">
Type "sensors"
Table true
Instance "ENTITY-MIB::entPhysicalDescr"
Values "CISCO-ENTITY-SENSOR-MIB::entSensorValue" "CISCO-ENTITY-SENSOR-MIB::entSensorThresholdValue"
</Data>
<Data "cisco_cpu">
Type "cisco_cpu"
Table true
Values "CISCO-PROCESS-MIB::cpmCPUTotal5secRev" "CISCO-PROCESS-MIB::cpmCPUTotal1minRev" "CISCO-PROCESS-MIB::cpmCPUTotal5minRev"
</Data>
<Data "uptime">
Type "uptime"
Table false
#Instance ""
Values "DISMAN-EVENT-MIB::sysUpTimeInstance"
</Data>
<Host "kbspamfw01">
Address ""
Version 2
Community ""
Collect "BarracudaSpamFW"
Interval 20
</Host>
<Host "sw01">
Address ""
Version 2
Community ""
Collect "cisco_cpu" "uptime" "sensors" "if_stats"
Interval 20
</Host>
<Host "sw02">
Address ""
Version 2
Community ""
Collect "cisco_cpu" "uptime" "sensors" "if_stats" "routes"
Interval 20
</Host>
<Host "sw03">
Address ""
Version 2
Community ""
Collect "cisco_cpu" "uptime" "sensors" "if_stats"
Interval 20
</Host>
</Plugin>
types.custom.db:
if_stats ifHCInOctets:COUNTER:0:U, ifHCOutOctets:COUNTER:0:U, ifHCInUcastPkts:COUNTER:0:U, ifHCInMulticastPkts:COUNTER:0:U, ifHCInBroadcastPkts:COUNTER:0:U, ifHCOutUcastPkts:COUNTER:0:U, ifHCOutMulticastPkts:COUNTER:0:U, ifHCOutBroadcastPkts:COUNTER:0:U, ifInDiscards:COUNTER:0:U, ifInErrors:COUNTER:0:U, ifOutDiscards:COUNTER:0:U, ifOutErrors:COUNTER:0:U
if_octets_hc ifHCInOctets:COUNTER:0:U, ifHCOutOctets:COUNTER:0:U
if_packets_hc ifHCInUcastPkts:COUNTER:0:U, ifHCInMcastPkts:COUNTER:0:U, ifHCInBcastPkts:COUNTER:0:U, ifHCOutUcastPkts:COUNTER:0:U, ifHCOutMcastPkts:COUNTER:0:U, ifHCOutBcastPkts:COUNTER:0:U
if_drop_discard_err_que ifInDiscards:COUNTER:0:U, ifInErrors:COUNTER:0:U, ifOutDiscards:COUNTER:0:U, ifOutErrors:COUNTER:0:U
if_octets ifInOctets:COUNTER:0:U, ifOutOctets:COUNTER:0:U
if_rgpackets ifInUcastPkts:COUNTER:0:U, ifInNUcastPkts:COUNTER:0:U, ifOutUcastPkts:COUNTER:0:U, ifOutNUcastPkts:COUNTER:0:U
sensors sensorValue:GAUGE:U:U, sensorThreshold:GAUGE:U:U
uptime uptime:GAUGE:U:U
cisco_cpu cpu5sec:GAUGE:0:100, cpu1min:GAUGE:0:100, cpu5min:GAUGE:0:100
routes ipv4routes:GAUGE:0:U, ipv6routes:GAUGE:0:U, mcastroutes:GAUGE:0:U
ups_powerware SecondsOnBattery:GAUGE:0:U, EstMinuteRem:GAUGE:0:U, EstChargeRem:GAUGE:0:U, BatteryVoltage:GAUGE:0:U, InputLineBads:COUNTER:0:U, InputNumLines:GAUGE:0:U, InputFrequency:GAUGE:0:U, InputVoltage:GAUGE:0:U, InputCurrent:GAUGE:0:U, OutputFrequency:GAUGE:0:U, OutputNumLines:GAUGE:0:U, OutputVoltage:GAUGE:0:U, OutputCurrent:GAUGE:0:U, OutputPower:GAUGE:0:U, OutputPctLoad:GAUGE:0:U
ups_powerware_v2 Uptime:GAUGE:0:U, BatteryStatus:GAUGE:0:U, SecondsOnBattery:GAUGE:0:U, EstMinuteRem:GAUGE:0:U, EstChargeRem:GAUGE:0:U, BatteryVoltage:GAUGE:0:U, BatteryCurrent:GAUGE:0:U, InputLineBads:COUNTER:0:U, InputFrequency:GAUGE:0:U, InputVoltage:GAUGE:0:U, InputCurrent:GAUGE:0:U, OutputSource:GAUGE:0:U, OutputFrequency:GAUGE:0:U, OutputVoltage:GAUGE:0:U, OutputCurrent:GAUGE:0:U, OutputPower:GAUGE:0:U, OutputPctLoad:GAUGE:0:U, Alarms:GAUGE:0:U
ELTEK_48VPLANT loadDistCurrent:GAUGE:0:50000, acVoltage1:GAUGE:0:50000, acVoltage2:GAUGE:0:50000, acVoltage3:GAUGE:0:50000, batVoltage:GAUGE:0:50000, batCurrent:GAUGE:0:50000, batTemp:GAUGE:-50:150, batTimeToDiscon:GAUGE:0:44640, batCapLeft:GAUGE:0:44640, batCapUsed:GAUGE:0:44640, batCapTotal:GAUGE:0:44640, batQuality:GAUGE:0:100, batFloatVoltConf:GAUGE:0:50000, batBoostVoltConf:GAUGE:0:50000, batHiMajAlmVltCnf:GAUGE:0:50000, batHiMinAlmVltCnf:GAUGE:0:50000, batLoMajAlmVltCnf:GAUGE:0:50000, batLoMinAlmVltCnf:GAUGE:0:50000, rectTotalCurrent:GAUGE:0:50000, rectUtilization:GAUGE:0:50000
ELTEK_RECTIFIER rectStatOutVolt:GAUGE:0:50000, rectStatTemp:GAUGE:0:50000, rectStatStatus:GAUGE:0:50000, rectStatOutCurr:GAUGE:0:50000
BarracudaSpamFW systemLoad:GAUGE:0:100, domainCount:GAUGE:0:10000, cpuTemperature:GAUGE:0:200, avgEmailLatency:GAUGE:0:20000, inQSize:GAUGE:0:200000, outQSize:GAUGE:0:200000, deferredQSize:GAUGE:0:200000, notifyQSize:GAUGE:0:200000, totInBlocked:COUNTER:0:100000, totInVirusBlk:COUNTER:0:100000, totInRateCtrl:COUNTER:0:100000, totInQuarantined:COUNTER:0:100000, totInTagged:COUNTER:0:100000, totOutPolicyBlk:COUNTER:0:100000, totOutSpamBlk:COUNTER:0:100000, totOutVirusBlk:COUNTER:0:100000, totOutRateCtrl:COUNTER:0:100000, totOutQuarantined:COUNTER:0:100000, totAllowed:COUNTER:0:100000, totEncrypted:COUNTER:0:100000, totRedirected:COUNTER:0:100000, totSent:COUNTER:0:100000
ipsla rttAdmNumDistBkt:GAUGE:0:200, rttAdmDistInt:GAUGE:0:200, rttTotalsInit:COUNTER:0:U, rttCollectDrops:COUNTER:0:U, rttCollectTimeouts:COUNTER:0:U, rttCptComplTimeMn:GAUGE:0:100000, rttCptComplTimeMx:GAUGE:0:100000, rttCptSumCmpTm2Hi:COUNTER:0:U, rttCptSumCmpTm2Lo:COUNTER:0:U, rttCptSumCmpTm:COUNTER:0:U, rttCptOverThres:COUNTER:0:U
ipslaminimal rttCptCompletions:COUNTER:0:U
ipsla2 rttCollectTimeouts:COUNTER:0:U