<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># -*- coding: utf-8 -*-
# Description: smart netdata python.d module
# Author: ilyam8, vorph1
# SPDX-License-Identifier: GPL-3.0-or-later

import os
import re
from copy import deepcopy
from time import time

from bases.FrameworkServices.SimpleService import SimpleService
from bases.collection import read_last_line

INCREMENTAL = 'incremental'
ABSOLUTE = 'absolute'

ATA = 'ata'
SCSI = 'scsi'
CSV = '.csv'

DEF_RESCAN_INTERVAL = 60
DEF_AGE = 30
DEF_PATH = '/var/log/smartd'

ATTR1 = '1'
ATTR2 = '2'
ATTR3 = '3'
ATTR4 = '4'
ATTR5 = '5'
ATTR7 = '7'
ATTR8 = '8'
ATTR9 = '9'
ATTR10 = '10'
ATTR11 = '11'
ATTR12 = '12'
ATTR13 = '13'
ATTR170 = '170'
ATTR171 = '171'
ATTR172 = '172'
ATTR173 = '173'
ATTR174 = '174'
ATTR180 = '180'
ATTR183 = '183'
ATTR190 = '190'
ATTR194 = '194'
ATTR196 = '196'
ATTR197 = '197'
ATTR198 = '198'
ATTR199 = '199'
ATTR202 = '202'
ATTR206 = '206'
ATTR233 = '233'
ATTR249 = '249'
ATTR_READ_ERR_COR = 'read-total-err-corrected'
ATTR_READ_ERR_UNC = 'read-total-unc-errors'
ATTR_WRITE_ERR_COR = 'write-total-err-corrected'
ATTR_WRITE_ERR_UNC = 'write-total-unc-errors'
ATTR_VERIFY_ERR_COR = 'verify-total-err-corrected'
ATTR_VERIFY_ERR_UNC = 'verify-total-unc-errors'
ATTR_TEMPERATURE = 'temperature'

RE_ATA = re.compile(
    '(\d+);'  # attribute
    '(\d+);'  # normalized value
    '(\d+)',  # raw value
    re.X
)

RE_SCSI = re.compile(
    '([a-z-]+);'  # attribute
    '([0-9.]+)',  # raw value
    re.X
)

ORDER = [
    # errors
    'read_error_rate',
    'seek_error_rate',
    'soft_read_error_rate',
    'write_error_rate',
    'read_total_err_corrected',
    'read_total_unc_errors',
    'write_total_err_corrected',
    'write_total_unc_errors',
    'verify_total_err_corrected',
    'verify_total_unc_errors',
    # external failure
    'sata_interface_downshift',
    'udma_crc_error_count',
    # performance
    'throughput_performance',
    'seek_time_performance',
    # power
    'start_stop_count',
    'power_on_hours_count',
    'power_cycle_count',
    'unexpected_power_loss',
    # spin
    'spin_up_time',
    'spin_up_retries',
    'calibration_retries',
    # temperature
    'airflow_temperature_celsius',
    'temperature_celsius',
    # wear
    'reallocated_sectors_count',
    'reserved_block_count',
    'program_fail_count',
    'erase_fail_count',
    'wear_leveller_worst_case_erase_count',
    'unused_reserved_nand_blocks',
    'reallocation_event_count',
    'current_pending_sector_count',
    'offline_uncorrectable_sector_count',
    'percent_lifetime_used',
    'media_wearout_indicator',
]

CHARTS = {
    'read_error_rate': {
        'options': [None, 'Read Error Rate', 'value', 'errors', 'smartd_log.read_error_rate', 'line'],
        'lines': [],
        'attrs': [ATTR1],
        'algo': ABSOLUTE,
    },
    'seek_error_rate': {
        'options': [None, 'Seek Error Rate', 'value', 'errors', 'smartd_log.seek_error_rate', 'line'],
        'lines': [],
        'attrs': [ATTR7],
        'algo': ABSOLUTE,
    },
    'soft_read_error_rate': {
        'options': [None, 'Soft Read Error Rate', 'errors', 'errors', 'smartd_log.soft_read_error_rate', 'line'],
        'lines': [],
        'attrs': [ATTR13],
        'algo': INCREMENTAL,
    },
    'write_error_rate': {
        'options': [None, 'Write Error Rate', 'value', 'errors', 'smartd_log.write_error_rate', 'line'],
        'lines': [],
        'attrs': [ATTR206],
        'algo': ABSOLUTE,
    },
    'read_total_err_corrected': {
        'options': [None, 'Read Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'],
        'lines': [],
        'attrs': [ATTR_READ_ERR_COR],
        'algo': INCREMENTAL,
    },
    'read_total_unc_errors': {
        'options': [None, 'Read Error Uncorrected', 'errors', 'errors', 'smartd_log.read_total_unc_errors', 'line'],
        'lines': [],
        'attrs': [ATTR_READ_ERR_UNC],
        'algo': INCREMENTAL,
    },
    'write_total_err_corrected': {
        'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'],
        'lines': [],
        'attrs': [ATTR_WRITE_ERR_COR],
        'algo': INCREMENTAL,
    },
    'write_total_unc_errors': {
        'options': [None, 'Write Error Uncorrected', 'errors', 'errors', 'smartd_log.write_total_unc_errors', 'line'],
        'lines': [],
        'attrs': [ATTR_WRITE_ERR_UNC],
        'algo': INCREMENTAL,
    },
    'verify_total_err_corrected': {
        'options': [None, 'Verify Error Corrected', 'errors', 'errors', 'smartd_log.verify_total_err_corrected',
                    'line'],
        'lines': [],
        'attrs': [ATTR_VERIFY_ERR_COR],
        'algo': INCREMENTAL,
    },
    'verify_total_unc_errors': {
        'options': [None, 'Verify Error Uncorrected', 'errors', 'errors', 'smartd_log.verify_total_unc_errors', 'line'],
        'lines': [],
        'attrs': [ATTR_VERIFY_ERR_UNC],
        'algo': INCREMENTAL,
    },
    'sata_interface_downshift': {
        'options': [None, 'SATA Interface Downshift', 'events', 'external failure',
                    'smartd_log.sata_interface_downshift', 'line'],
        'lines': [],
        'attrs': [ATTR183],
        'algo': INCREMENTAL,
    },
    'udma_crc_error_count': {
        'options': [None, 'UDMA CRC Error Count', 'errors', 'external failure', 'smartd_log.udma_crc_error_count',
                    'line'],
        'lines': [],
        'attrs': [ATTR199],
        'algo': INCREMENTAL,
    },
    'throughput_performance': {
        'options': [None, 'Throughput Performance', 'value', 'performance', 'smartd_log.throughput_performance',
                    'line'],
        'lines': [],
        'attrs': [ATTR2],
        'algo': ABSOLUTE,
    },
    'seek_time_performance': {
        'options': [None, 'Seek Time Performance', 'value', 'performance', 'smartd_log.seek_time_performance', 'line'],
        'lines': [],
        'attrs': [ATTR8],
        'algo': ABSOLUTE,
    },
    'start_stop_count': {
        'options': [None, 'Start/Stop Count', 'events', 'power', 'smartd_log.start_stop_count', 'line'],
        'lines': [],
        'attrs': [ATTR4],
        'algo': ABSOLUTE,
    },
    'power_on_hours_count': {
        'options': [None, 'Power-On Hours Count', 'hours', 'power', 'smartd_log.power_on_hours_count', 'line'],
        'lines': [],
        'attrs': [ATTR9],
        'algo': ABSOLUTE,
    },
    'power_cycle_count': {
        'options': [None, 'Power Cycle Count', 'events', 'power', 'smartd_log.power_cycle_count', 'line'],
        'lines': [],
        'attrs': [ATTR12],
        'algo': ABSOLUTE,
    },
    'unexpected_power_loss': {
        'options': [None, 'Unexpected Power Loss', 'events', 'power', 'smartd_log.unexpected_power_loss', 'line'],
        'lines': [],
        'attrs': [ATTR174],
        'algo': ABSOLUTE,
    },
    'spin_up_time': {
        'options': [None, 'Spin-Up Time', 'ms', 'spin', 'smartd_log.spin_up_time', 'line'],
        'lines': [],
        'attrs': [ATTR3],
        'algo': ABSOLUTE,
    },
    'spin_up_retries': {
        'options': [None, 'Spin-up Retries', 'retries', 'spin', 'smartd_log.spin_up_retries', 'line'],
        'lines': [],
        'attrs': [ATTR10],
        'algo': INCREMENTAL,
    },
    'calibration_retries': {
        'options': [None, 'Calibration Retries', 'retries', 'spin', 'smartd_log.calibration_retries', 'line'],
        'lines': [],
        'attrs': [ATTR11],
        'algo': INCREMENTAL,
    },
    'airflow_temperature_celsius': {
        'options': [None, 'Airflow Temperature Celsius', 'celsius', 'temperature',
                    'smartd_log.airflow_temperature_celsius', 'line'],
        'lines': [],
        'attrs': [ATTR190],
        'algo': ABSOLUTE,
    },
    'temperature_celsius': {
        'options': [None, 'Temperature', 'celsius', 'temperature', 'smartd_log.temperature_celsius', 'line'],
        'lines': [],
        'attrs': [ATTR194, ATTR_TEMPERATURE],
        'algo': ABSOLUTE,
    },
    'reallocated_sectors_count': {
        'options': [None, 'Reallocated Sectors Count', 'sectors', 'wear', 'smartd_log.reallocated_sectors_count',
                    'line'],
        'lines': [],
        'attrs': [ATTR5],
        'algo': ABSOLUTE,
    },
    'reserved_block_count': {
        'options': [None, 'Reserved Block Count', 'percentage', 'wear', 'smartd_log.reserved_block_count', 'line'],
        'lines': [],
        'attrs': [ATTR170],
        'algo': ABSOLUTE,
    },
    'program_fail_count': {
        'options': [None, 'Program Fail Count', 'errors', 'wear', 'smartd_log.program_fail_count', 'line'],
        'lines': [],
        'attrs': [ATTR171],
        'algo': INCREMENTAL,
    },
    'erase_fail_count': {
        'options': [None, 'Erase Fail Count', 'failures', 'wear', 'smartd_log.erase_fail_count', 'line'],
        'lines': [],
        'attrs': [ATTR172],
        'algo': INCREMENTAL,
    },
    'wear_leveller_worst_case_erase_count': {
        'options': [None, 'Wear Leveller Worst Case Erase Count', 'erases', 'wear',
                    'smartd_log.wear_leveller_worst_case_erase_count', 'line'],
        'lines': [],
        'attrs': [ATTR173],
        'algo': ABSOLUTE,
    },
    'unused_reserved_nand_blocks': {
        'options': [None, 'Unused Reserved NAND Blocks', 'blocks', 'wear', 'smartd_log.unused_reserved_nand_blocks',
                    'line'],
        'lines': [],
        'attrs': [ATTR180],
        'algo': ABSOLUTE,
    },
    'reallocation_event_count': {
        'options': [None, 'Reallocation Event Count', 'events', 'wear', 'smartd_log.reallocation_event_count', 'line'],
        'lines': [],
        'attrs': [ATTR196],
        'algo': INCREMENTAL,
    },
    'current_pending_sector_count': {
        'options': [None, 'Current Pending Sector Count', 'sectors', 'wear', 'smartd_log.current_pending_sector_count',
                    'line'],
        'lines': [],
        'attrs': [ATTR197],
        'algo': ABSOLUTE,
    },
    'offline_uncorrectable_sector_count': {
        'options': [None, 'Offline Uncorrectable Sector Count', 'sectors', 'wear',
                    'smartd_log.offline_uncorrectable_sector_count', 'line'],
        'lines': [],
        'attrs': [ATTR198],
        'algo': ABSOLUTE,

    },
    'percent_lifetime_used': {
        'options': [None, 'Percent Lifetime Used', 'percentage', 'wear', 'smartd_log.percent_lifetime_used', 'line'],
        'lines': [],
        'attrs': [ATTR202],
        'algo': ABSOLUTE,
    },
    'media_wearout_indicator': {
        'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'],
        'lines': [],
        'attrs': [ATTR233],
        'algo': ABSOLUTE,
    },
    'nand_writes_1gib': {
        'options': [None, 'NAND Writes', 'GiB', 'wear', 'smartd_log.nand_writes_1gib', 'line'],
        'lines': [],
        'attrs': [ATTR249],
        'algo': ABSOLUTE,
    },
}

# NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin
# C code:
# https://github.com/smartmontools/smartmontools/blob/master/smartmontools/atacmds.cpp#L2051
#
# Calling 'parse_temp' on the raw value will return a 4-tuple, containing
#  * temperature
#  * minimum
#  * maximum
#  * over-temperature count
# substituting None for values it can't decode.
#
# Example:
# &gt;&gt;&gt; parse_temp(42952491042)
# &gt;&gt;&gt; (34, 10, 43, None)
#
#
# def check_temp_word(i):
#     if i &lt;= 0x7F:
#         return 0x11
#     elif i &lt;= 0xFF:
#         return 0x01
#     elif 0xFF80 &lt;= i:
#         return 0x10
#     return 0x00
#
#
# def check_temp_range(t, b0, b1):
#     if b0 &gt; b1:
#         t0, t1 = b1, b0
#     else:
#         t0, t1 = b0, b1
#
#     if all([
#         -60 &lt;= t0,
#         t0 &lt;= t,
#         t &lt;= t1,
#         t1 &lt;= 120,
#         not (t0 == -1 and t1 &lt;= 0)
#     ]):
#         return t0, t1
#     return None, None
#
#
# def parse_temp(raw):
#     byte = list()
#     word = list()
#     for i in range(0, 6):
#         byte.append(0xFF &amp; (raw &gt;&gt; (i * 8)))
#     for i in range(0, 3):
#         word.append(0xFFFF &amp; (raw &gt;&gt; (i * 16)))
#
#     ctwd = check_temp_word(word[0])
#
#     if not word[2]:
#         if ctwd and not word[1]:
#             # byte[0] is temp, no other data
#             return byte[0], None, None, None
#
#         if ctwd and all(check_temp_range(byte[0], byte[2], byte[3])):
#             # byte[0] is temp, byte[2] is max or min, byte[3] is min or max
#             trange = check_temp_range(byte[0], byte[2], byte[3])
#             return byte[0], trange[0], trange[1], None
#
#         if ctwd and all(check_temp_range(byte[0], byte[1], byte[2])):
#             # byte[0] is temp, byte[1] is max or min, byte[2] is min or max
#             trange = check_temp_range(byte[0], byte[1], byte[2])
#             return byte[0], trange[0], trange[1], None
#
#         return None, None, None, None
#
#     if ctwd:
#         if all(
#                 [
#                     ctwd &amp; check_temp_word(word[1]) &amp; check_temp_word(word[2]) != 0x00,
#                     all(check_temp_range(byte[0], byte[2], byte[4])),
#                 ]
#         ):
#             # byte[0] is temp, byte[2] is max or min, byte[4] is min or max
#             trange = check_temp_range(byte[0], byte[2], byte[4])
#             return byte[0], trange[0], trange[1], None
#         else:
#             trange = check_temp_range(byte[0], byte[2], byte[3])
#             if word[2] &lt; 0x7FFF and all(trange) and trange[1] &gt;= 40:
#                 # byte[0] is temp, byte[2] is max or min, byte[3] is min or max, word[2] is overtemp count
#                 return byte[0], trange[0], trange[1], word[2]
#     # no data
#     return None, None, None, None


CHARTED_ATTRS = dict((attr, k) for k, v in CHARTS.items() for attr in v['attrs'])


class BaseAtaSmartAttribute:
    def __init__(self, name, normalized_value, raw_value):
        self.name = name
        self.normalized_value = normalized_value
        self.raw_value = raw_value

    def value(self):
        raise NotImplementedError


class AtaRaw(BaseAtaSmartAttribute):
    def value(self):
        return self.raw_value


class AtaNormalized(BaseAtaSmartAttribute):
    def value(self):
        return self.normalized_value


class Ata3(BaseAtaSmartAttribute):
    def value(self):
        value = int(self.raw_value)
        # https://github.com/netdata/netdata/issues/5919
        #
        # 3;151;38684000679;
        # 423 (Average 447)
        # 38684000679 &amp; 0xFFF -&gt; 423
        # (38684000679 &amp; 0xFFF0000) &gt;&gt; 16 -&gt; 447
        if value &gt; 1e6:
            return value &amp; 0xFFF
        return value


class Ata9(BaseAtaSmartAttribute):
    def value(self):
        value = int(self.raw_value)
        if value &gt; 1e6:
            return value &amp; 0xFFFF
        return value


class Ata190(BaseAtaSmartAttribute):
    def value(self):
        return 100 - int(self.normalized_value)


class Ata194(BaseAtaSmartAttribute):
    # https://github.com/netdata/netdata/issues/3041
    # https://github.com/netdata/netdata/issues/5919
    #
    # The low byte is the current temperature, the third lowest is the maximum, and the fifth lowest is the minimum
    def value(self):
        value = int(self.raw_value)
        if value &gt; 1e6:
            return value &amp; 0xFF
        return min(int(self.normalized_value), int(self.raw_value))


class BaseSCSISmartAttribute:
    def __init__(self, name, raw_value):
        self.name = name
        self.raw_value = raw_value

    def value(self):
        raise NotImplementedError


class SCSIRaw(BaseSCSISmartAttribute):
    def value(self):
        return self.raw_value


def ata_attribute_factory(value):
    name = value[0]

    if name == ATTR3:
        return Ata3(*value)
    elif name == ATTR9:
        return Ata9(*value)
    elif name == ATTR190:
        return Ata190(*value)
    elif name == ATTR194:
        return Ata194(*value)
    elif name in [
        ATTR1,
        ATTR7,
        ATTR202,
        ATTR206,
        ATTR233,
    ]:
        return AtaNormalized(*value)

    return AtaRaw(*value)


def scsi_attribute_factory(value):
    return SCSIRaw(*value)


def attribute_factory(value):
    name = value[0]
    if name.isdigit():
        return ata_attribute_factory(value)
    return scsi_attribute_factory(value)


def handle_error(*errors):
    def on_method(method):
        def on_call(*args):
            try:
                return method(*args)
            except errors:
                return None

        return on_call

    return on_method


class DiskLogFile:
    def __init__(self, full_path):
        self.path = full_path
        self.size = os.path.getsize(full_path)

    @handle_error(OSError)
    def is_changed(self):
        return self.size != os.path.getsize(self.path)

    @handle_error(OSError)
    def is_active(self, current_time, limit):
        return (current_time - os.path.getmtime(self.path)) / 60 &lt; limit

    @handle_error(OSError)
    def read(self):
        self.size = os.path.getsize(self.path)
        return read_last_line(self.path)


class BaseDisk:
    def __init__(self, name, log_file):
        self.raw_name = name
        self.name = re.sub(r'_+', '_', name)
        self.log_file = log_file
        self.attrs = list()
        self.alive = True
        self.charted = False

    def __eq__(self, other):
        if isinstance(other, BaseDisk):
            return self.raw_name == other.raw_name
        return self.raw_name == other

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash(repr(self))

    def parser(self, data):
        raise NotImplementedError

    @handle_error(TypeError)
    def populate_attrs(self):
        self.attrs = list()
        line = self.log_file.read()
        for value in self.parser(line):
            self.attrs.append(attribute_factory(value))

        return len(self.attrs)

    def data(self):
        data = dict()
        for attr in self.attrs:
            data['{0}_{1}'.format(self.name, attr.name)] = attr.value()
        return data


class ATADisk(BaseDisk):
    def parser(self, data):
        return RE_ATA.findall(data)


class SCSIDisk(BaseDisk):
    def parser(self, data):
        return RE_SCSI.findall(data)


class Service(SimpleService):
    def __init__(self, configuration=None, name=None):
        SimpleService.__init__(self, configuration=configuration, name=name)
        self.order = ORDER
        self.definitions = deepcopy(CHARTS)
        self.log_path = configuration.get('log_path', DEF_PATH)
        self.age = configuration.get('age', DEF_AGE)
        self.exclude = configuration.get('exclude_disks', str()).split()
        self.disks = list()
        self.runs = 0
        self.do_force_rescan = False

    def check(self):
        return self.scan() &gt; 0

    def get_data(self):
        self.runs += 1

        if self.do_force_rescan or self.runs % DEF_RESCAN_INTERVAL == 0:
            self.cleanup()
            self.scan()
            self.do_force_rescan = False

        data = dict()

        for disk in self.disks:
            if not disk.alive:
                continue

            if not disk.charted:
                self.add_disk_to_charts(disk)

            changed = disk.log_file.is_changed()

            if changed is None:
                disk.alive = False
                self.do_force_rescan = True
                continue

            if changed and disk.populate_attrs() is None:
                disk.alive = False
                self.do_force_rescan = True
                continue

            data.update(disk.data())

        return data

    def cleanup(self):
        current_time = time()
        for disk in self.disks[:]:
            if any(
                    [
                        not disk.alive,
                        not disk.log_file.is_active(current_time, self.age),
                    ]
            ):
                self.disks.remove(disk.raw_name)
                self.remove_disk_from_charts(disk)

    def scan(self):
        self.debug('scanning {0}'.format(self.log_path))
        current_time = time()

        for full_name in os.listdir(self.log_path):
            disk = self.create_disk_from_file(full_name, current_time)
            if not disk:
                continue
            self.disks.append(disk)

        return len(self.disks)

    def create_disk_from_file(self, full_name, current_time):
        if not full_name.endswith(CSV):
            self.debug('skipping {0}: not a csv file'.format(full_name))
            return None

        name = os.path.basename(full_name).split('.')[-3]
        path = os.path.join(self.log_path, full_name)

        if name in self.disks:
            self.debug('skipping {0}: already in disks'.format(full_name))
            return None

        if [p for p in self.exclude if p in name]:
            self.debug('skipping {0}: filtered by `exclude` option'.format(full_name))
            return None

        if not os.access(path, os.R_OK):
            self.debug('skipping {0}: not readable'.format(full_name))
            return None

        if os.path.getsize(path) == 0:
            self.debug('skipping {0}: zero size'.format(full_name))
            return None

        if (current_time - os.path.getmtime(path)) / 60 &gt; self.age:
            self.debug('skipping {0}: haven\'t been updated for last {1} minutes'.format(full_name, self.age))
            return None

        if ATA in full_name:
            disk = ATADisk(name, DiskLogFile(path))
        elif SCSI in full_name:
            disk = SCSIDisk(name, DiskLogFile(path))
        else:
            self.debug('skipping {0}: unknown type'.format(full_name))
            return None

        disk.populate_attrs()
        if not disk.attrs:
            self.error('skipping {0}: parsing failed'.format(full_name))
            return None

        self.debug('added {0}'.format(full_name))
        return disk

    def add_disk_to_charts(self, disk):
        if len(self.charts) == 0 or disk.charted:
            return
        disk.charted = True

        for attr in disk.attrs:
            chart_id = CHARTED_ATTRS.get(attr.name)

            if not chart_id or chart_id not in self.charts:
                continue

            chart = self.charts[chart_id]
            dim = [
                '{0}_{1}'.format(disk.name, attr.name),
                disk.name,
                CHARTS[chart_id]['algo'],
            ]

            if dim[0] in self.charts[chart_id].dimensions:
                chart.hide_dimension(dim[0], reverse=True)
            else:
                chart.add_dimension(dim)

    def remove_disk_from_charts(self, disk):
        if len(self.charts) == 0 or not disk.charted:
            return

        for attr in disk.attrs:
            chart_id = CHARTED_ATTRS.get(attr.name)

            if not chart_id or chart_id not in self.charts:
                continue

            self.charts[chart_id].del_dimension('{0}_{1}'.format(disk.name, attr.name))
</pre></body></html>