问题症状
在虚拟机里部署OpenStack环境,遇到一个问题,openstack token issue可以正常生成token,但nova list等命令都报401错误,用curl GET请求验证生成的token也都401错误。
解决方法先讲下,最终发现是因为虚拟机的时间不同步导致的,用ntpdate命令同步下时间之后就可以了。虚拟机时间不同步,应该是做快照过程中pause虚拟机导致其时间停止,从而发生时间延后,因为延后的时间比较长,有好几个小时,所以才导致这个问题。
分析过程
keystone日志看到如下告警:
1 |
2018-01-30 08:18:08.700 20219 WARNING keystone.middleware.auth [req-10eba7f9-49f0-4504-9f91-d508f71eef46 - - - - -] RBAC: Invalid token |
但明明是刚生成的token,怎么会不可用?
直接加断点调试,调到最后发现token invalid的原因在:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
def _search(self, revoke_map, names, token_data): """Search for revocation event by token_data Traverse the revocation events tree looking for event matching token data issued after the token. """ if not names: # The last (leaf) level is checked in a special way because we # verify issued_at field differently. try: ### 下面的判断返回了True return revoke_map['issued_before'] >= token_data['issued_at'] except KeyError: return False name, remaining_names = names[0], names[1:] for key in self._next_level_keys(name, token_data): subtree = revoke_map.get('%s=%s' % (name, key)) if subtree and self._search(subtree, remaining_names, token_data): return True # If we made it out of the loop then no element in revocation tree # corresponds to our token and it is good. return False |
头一次听说还要检查token的生成时间,以前看老版本keystone代码,貌似只有对expires_at过期时间的检查。
继续看为啥要检查生成时间,找到了revoke event,在更新用户、revoke token等操作过程中就会执行相关检查,event的注册代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
def _register_listeners(self): callbacks = { notifications.ACTIONS.deleted: [ ### 监听的事件 ['OS-TRUST:trust', self._trust_callback], ### 关注的事件的payload关键词,回调方法 ['OS-OAUTH1:consumer', self._consumer_callback], ['OS-OAUTH1:access_token', self._access_token_callback], ['role', self._role_callback], ['user', self._user_callback], ['project', self._project_callback], ['role_assignment', self._role_assignment_callback] ], notifications.ACTIONS.disabled: [ ['user', self._user_callback], ['project', self._project_callback], ['domain', self._domain_callback], ], notifications.ACTIONS.internal: [ [notifications.INVALIDATE_USER_TOKEN_PERSISTENCE, self._user_callback], ] } for event, cb_info in callbacks.items(): for resource_type, callback_fns in cb_info: notifications.register_event_callback(event, resource_type, callback_fns) |
以revoke token为例说明相关流程:
1 2 3 4 |
@controller.protected() def revoke_token(self, context): token_id = context.get('subject_token_id') return self.token_provider_api.revoke_token(token_id) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
def revoke_token(self, token_id, revoke_chain=False): revoke_by_expires = False project_id = None domain_id = None token_ref = token_model.KeystoneToken( token_id=token_id, token_data=self.validate_token(token_id)) user_id = token_ref.user_id expires_at = token_ref.expires audit_id = token_ref.audit_id audit_chain_id = token_ref.audit_chain_id if token_ref.project_scoped: project_id = token_ref.project_id if token_ref.domain_scoped: domain_id = token_ref.domain_id if audit_id is None and not revoke_chain: LOG.debug('Received token with no audit_id.') revoke_by_expires = True if audit_chain_id is None and revoke_chain: LOG.debug('Received token with no audit_chain_id.') revoke_by_expires = True if revoke_by_expires: self.revoke_api.revoke_by_expiration(user_id, expires_at, project_id=project_id, domain_id=domain_id) elif revoke_chain: self.revoke_api.revoke_by_audit_chain_id(audit_chain_id, project_id=project_id, domain_id=domain_id) else: self.revoke_api.revoke_by_audit_id(audit_id) ### 走这里 if CONF.token.revoke_by_id and self._needs_persistence: self._persistence.delete_token(token_id=token_id) |
1 2 |
def revoke_by_audit_id(self, audit_id): self.revoke(revoke_model.RevokeEvent(audit_id=audit_id)) |
1 2 3 4 5 |
def revoke(self, event): ### driver是根据driver_namespace = 'keystone.revoke'和CONF.revoke.driver ### 以及setup.cfg的entry_points确定的 self.driver.revoke(event) self._get_revoke_tree.invalidate(self) |
1 2 3 4 5 6 7 8 |
def revoke(self, event): kwargs = dict() for attr in revoke_model.REVOKE_KEYS: kwargs[attr] = getattr(event, attr) record = RevocationEvent(**kwargs) with sql.session_for_write() as session: session.add(record) ### 保存数据库记录 self._prune_expired_events() |
token内容类似(有audit_ids,但没看到audit_chain_id):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
{ "token": { "audit_ids": [ "ukdlt-mGQtyl6mnNYGYdsQ" ], "catalog": [ { "endpoints": [ { "id": "4f5f80c9e87140929c90964d95a9fc0d", "interface": "internal", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:5000/v3" }, { "id": "5bb4f46e9dbf4575b7e8f1244272c178", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:5000/v3" }, { "id": "8717669ebc0b4299af2750ab282f4d5d", "interface": "admin", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:35357/v3" } ], "id": "307e9642e0cf44ca9c7798796cdf9290", "name": "keystone", "type": "identity" }, { "endpoints": [ { "id": "64666fafeaea4f87994d8c9ef67f82e2", "interface": "internal", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:9292" }, { "id": "77efcd4ca5f1401dabc5b7ad32e50351", "interface": "admin", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:9292" }, { "id": "9d40878e853c4637a66732da1aa51c89", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:9292" } ], "id": "3456cac60558400191a82ed400e8f196", "name": "glance", "type": "image" }, { "endpoints": [ { "id": "274b02e30d1746d0950051242188874d", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:9696" }, { "id": "38ff891fd7684163a6eb108ba60ff28a", "interface": "internal", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:9696" }, { "id": "3aca19ca07ea4bafadfc789da777179a", "interface": "admin", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:9696" } ], "id": "74941df6655d4338a32cd9a40422f676", "name": "neutron", "type": "network" }, { "endpoints": [ { "id": "41bfe2c2c1ae40d196dd81049cea2b98", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8776/v2/1829813dc6e94d95aec1d1ace95b587b" }, { "id": "61c046be865045f19304ba1d5e160af8", "interface": "internal", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8776/v2/1829813dc6e94d95aec1d1ace95b587b" }, { "id": "9556e0e7d18c4a518e1239a0cb3a53ee", "interface": "admin", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8776/v2/1829813dc6e94d95aec1d1ace95b587b" } ], "id": "82c4abe7015c4f4d8609bdaff6906187", "name": "cinderv2", "type": "volumev2" }, { "endpoints": [ { "id": "4cf84e5ba40c4433a52abfd80fcce279", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8774/v2.1/1829813dc6e94d95aec1d1ace95b587b" }, { "id": "e6f0b85808214b6594825bdf5f7453df", "interface": "admin", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8774/v2.1/1829813dc6e94d95aec1d1ace95b587b" }, { "id": "f831a5a1035d44bfa4d228e49ddbbc33", "interface": "internal", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8774/v2.1/1829813dc6e94d95aec1d1ace95b587b" } ], "id": "b6f480b4504143b4bce292bbb498a68b", "name": "nova", "type": "compute" }, { "endpoints": [ { "id": "42118b37e7854851ae7165e585548db2", "interface": "internal", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8776/v1/1829813dc6e94d95aec1d1ace95b587b" }, { "id": "c11ad2ef852742cdbb69c4bd6d70de33", "interface": "admin", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8776/v1/1829813dc6e94d95aec1d1ace95b587b" }, { "id": "ef88e5228ca7487fa4b7eace8b366472", "interface": "public", "region": "RegionOne", "region_id": "RegionOne", "url": "http://vs-controller:8776/v1/1829813dc6e94d95aec1d1ace95b587b" } ], "id": "d55298be79634b5887fd831ec5989e97", "name": "cinder", "type": "volume" } ], "expires_at": "2017-08-02T09:07:38.000000Z", "issued_at": "2017-08-02T08:07:38.000000Z", "methods": [ "password" ], "project": { "domain": { "id": "a77fc52dc6664cca92e4a18e49dc0375", "name": "default" }, "id": "1829813dc6e94d95aec1d1ace95b587b", "name": "admin" }, "roles": [ { "id": "db9c8afebaaf4870ade55402a8df67ed", "name": "admin" } ], "user": { "domain": { "id": "a77fc52dc6664cca92e4a18e49dc0375", "name": "default" }, "id": "212a66c41a334b74bffea2cebe14d400", "name": "admin" } } } |
数据库更新记录:
再以更新用户为例:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
@domains_configured @exception_translated('user') def update_user(self, user_id, user_ref, initiator=None): old_user_ref = self.get_user(user_id) user = user_ref.copy() if 'name' in user: user['name'] = clean.user_name(user['name']) if 'enabled' in user: user['enabled'] = clean.user_enabled(user['enabled']) if 'domain_id' in user: self._check_update_of_domain_id(user['domain_id'], old_user_ref['domain_id']) self.resource_api.get_domain(user['domain_id']) if 'id' in user: if user_id != user['id']: raise exception.ValidationError(_('Cannot change user ID')) # Since any ID in the user dict is now irrelevant, remove its so as # the driver layer won't be confused by the fact the this is the # public ID not the local ID user.pop('id') domain_id, driver, entity_id = ( self._get_domain_driver_and_entity_id(user_id)) user = self._clear_domain_id_if_domain_unaware(driver, user) self.get_user.invalidate(self, old_user_ref['id']) self.get_user_by_name.invalidate(self, old_user_ref['name'], old_user_ref['domain_id']) ref = driver.update_user(entity_id, user) notifications.Audit.updated(self._USER, user_id, initiator) enabled_change = ((user.get('enabled') is False) and user['enabled'] != old_user_ref.get('enabled')) if enabled_change or user.get('password') is not None: ### disable用户或者修改密码,会发送revoke event self.emit_invalidate_user_token_persistence(user_id) return self._set_domain_id_and_mapping( ref, domain_id, driver, mapping.EntityType.USER) |
1 2 3 4 5 6 7 8 9 10 11 12 |
def emit_invalidate_user_token_persistence(self, user_id): """Emit a notification to the callback system to revoke user tokens. This method and associated callback listener removes the need for making a direct call to another manager to delete and revoke tokens. :param user_id: user identifier :type user_id: string """ notifications.Audit.internal( ### 上面_register_listeners注册了这个event和关键词 notifications.INVALIDATE_USER_TOKEN_PERSISTENCE, user_id ) |
之后执行注册好的回调:
1 2 3 |
def _user_callback(self, service, resource_type, operation, payload): self.revoke_by_user(payload['resource_info']) |
1 2 |
def revoke_by_user(self, user_id): return self.revoke(revoke_model.RevokeEvent(user_id=user_id)) |
之后就与上面的revoke token流程类似了,数据库更新记录:
补充知识
为啥要有revoke流程?(以下内容为个人猜测,未经验证)
之前用uuid格式的token,都存在token表里面,有个valid字段标识是否可用,删掉或者更新用户导致失效的,就把这个字段改成0,表示已失效,但是fernet格式token没有数据库记录,没办法做这种操作,只能改为把revoke相关记录保存在单独的表中,也就是revocation_event表。
这只是revocation event相关流程的一部分,所以本文的标题加了part,其他部分还没研究,等后面有机会看明白了再补充。