Ceph OSD IO线程健康状态检查机制

目前看到H版本OSD IO线程健康状态有3种心跳机制（L版本对比看了下，基本没变化）：
1. service线程：

CephContextServiceThread::entry() {
    while (1) {
      Mutex::Locker l(_lock);

      if (_cct-&gt;_conf-&gt;heartbeat_interval) {
        utime_t interval(_cct-&gt;_conf-&gt;heartbeat_interval, 0);
        _cond.WaitInterval(_cct, _lock, interval);
      } else
        _cond.Wait(_lock);
      ......
      _cct-&gt;_heartbeat_map-&gt;check_touch_file(); // 这里检查完只是修改一下心跳文件的时间戳，没有其他操作，目前我们心跳文件路径（heartbeat_file配置项）没有配置，所以这一步其实啥也不做
      _cct-&gt;refresh_perf_values(); // 这个是第4种隐藏机制，下面有说明
    }
    return NULL;
}

CephContextServiceThread::entry() {

while (1) {

Mutex::Locker l(_lock);

if (_cct->_conf->heartbeat_interval) {

utime_t interval(_cct->_conf->heartbeat_interval, 0);

_cond.WaitInterval(_cct, _lock, interval);

} else

_cond.Wait(_lock);

......

_cct->_heartbeat_map->check_touch_file(); // 这里检查完只是修改一下心跳文件的时间戳，没有其他操作，目前我们心跳文件路径（heartbeat_file配置项）没有配置，所以这一步其实啥也不做

_cct->refresh_perf_values(); // 这个是第4种隐藏机制，下面有说明

}

return NULL;

}

OSD tick：

void OSD::tick()
{
  ......
  if (is_waiting_for_healthy()) {// 先判断OSD状态是否为STATE_WAITING_FOR_HEALTHY，会在start_waiting_for_healthy()这里更改为这个状态
    if (_is_healthy()) {//这里不常走到，一般是在启动流程才走
      dout(1) &lt;&lt; "healthy again, booting" &lt;&lt; dendl;
      set_state(STATE_BOOTING);
      start_boot();
    }
  }
  ......
}

void OSD::tick()

{

......

if (is_waiting_for_healthy()) {// 先判断OSD状态是否为STATE_WAITING_FOR_HEALTHY，会在start_waiting_for_healthy()这里更改为这个状态

if (_is_healthy()) {//这里不常走到，一般是在启动流程才走

dout(1) << "healthy again, booting" << dendl;

set_state(STATE_BOOTING);

start_boot();

}

......

}

其他OSD发过来的ping请求：

void OSD::handle_osd_ping(MOSDPing *m)
{
  ......
  switch (m-&gt;op) {

  case MOSDPing::PING:
  {
    ......
    if (!cct-&gt;get_heartbeat_map()-&gt;is_healthy()) {//检查IO线程健康状态
        dout(10) &lt;&lt; "internal heartbeat not healthy, dropping ping request" &lt;&lt; dendl;
        break; //如果不健康则不给ping请求方回包
    }
    // IO线程正常，回包给请求方
    Message *r = new MOSDPing(monc-&gt;get_fsid(),
                curmap-&gt;get_epoch(),
                MOSDPing::PING_REPLY,
                m-&gt;stamp);
    m-&gt;get_connection()-&gt;send_message(r);

void OSD::handle_osd_ping(MOSDPing *m)

{

......

switch (m->op) {

case MOSDPing::PING:

{

......

if (!cct->get_heartbeat_map()->is_healthy()) {//检查IO线程健康状态

dout(10) << "internal heartbeat not healthy, dropping ping request" << dendl;

break; //如果不健康则不给ping请求方回包

}

// IO线程正常，回包给请求方

Message *r = new MOSDPing(monc->get_fsid(),

curmap->get_epoch(),

MOSDPing::PING_REPLY,

m->stamp);

m->get_connection()->send_message(r);

在L版本中其实还有一种机制，基于perfcounter实现，在service线程里（第一种机制代码位置相同）会更新健康IO线程数量和总IO线程数量到perfcounter中的l_cct_total_workers、l_cct_unhealthy_workers两个计数器上，可惜OSD启动时没有enable这两个counter（使用CephContext::enable_perf_counter），搜索代码可以看到rgw和rbd_mirror两个模块enable了，如果有需要我们可以自己在OSD启动过程中enable起来（g_ceph_context->enable_perf_counter()即可）。之后就可以通过ceph daemon osd.0 perf dump来查看相关counter信息了。

心跳超时默认15s打告警日志，超过150s会导致OSD自杀。

2018-12-23 13:27:09.538268 7fca5cc91700  1 heartbeat_map is_healthy 'OSD::osd_op_tp thread 0x7fca0f5e9700' had timed out after 15
2018-12-23 13:27:09.538283 7fca5cc91700  1 heartbeat_map is_healthy 'OSD::osd_op_tp thread 0x7fca0f5e9700' had suicide timed out after 150

2018-12-23 13:27:09.538268 7fca5cc91700 1 heartbeat_map is_healthy 'OSD::osd_op_tp thread 0x7fca0f5e9700' had timed out after 15

2018-12-23 13:27:09.538283 7fca5cc91700 1 heartbeat_map is_healthy 'OSD::osd_op_tp thread 0x7fca0f5e9700' had suicide timed out after 150