request = self._service.projects().locations().nodes().get(name=full_name)
response = request.execute()
- instance_url = '%s:%s' % (response['ipAddress'], response['port'])
- worker_list.append(instance_url)
+ if 'health' in response and response['health'] == 'HEALTHY':
+ instance_url = '%s:%s' % (response['ipAddress'], response['port'])
+ worker_list.append(instance_url)
return ClusterSpec({self._job_name: worker_list})
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
- 'port': '8470'
+ 'port': '8470',
+ 'health': 'HEALTHY'
}
}
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
- 'port': '8470'
+ 'port': '8470',
+ 'health': 'HEALTHY'
}
}
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
- 'port': '8470'
+ 'port': '8470',
+ 'health': 'HEALTHY'
},
'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': {
'ipAddress': '10.4.5.6',
- 'port': '8470'
+ 'port': '8470',
+ 'health': 'HEALTHY'
}
}
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
+ def testHealthyTpuNodeRetrieval(self):
+ tpu_map = {
+ 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
+ 'ipAddress': '10.1.2.3',
+ 'port': '8470',
+ 'health': 'HEALTHY'
+ },
+ 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': {
+ 'ipAddress': '10.4.5.6',
+ 'port': '8470',
+ },
+ 'projects/test-project/locations/us-central1-c/nodes/test-tpu-3': {
+ 'ipAddress': '10.7.8.9',
+ 'port': '8470',
+ 'health': 'UNHEALTHY'
+ }
+ }
+
+ tpu_cluster_resolver = TPUClusterResolver(
+ project='test-project',
+ zone='us-central1-c',
+ tpu_names=['test-tpu-2', 'test-tpu-1', 'test-tpu-3'],
+ credentials=None,
+ service=self.mock_service_client(tpu_map=tpu_map))
+
+ actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+ expected_proto = """
+ job {
+ name: 'tpu_worker'
+ tasks {
+ key: 0
+ value: '10.1.2.3:8470'
+ }
+ }
+ """
+ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
+
def testGetMasterMultipleEntries(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
- 'port': '8470'
+ 'port': '8470',
+ 'health': 'HEALTHY'
},
'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': {
'ipAddress': '10.4.5.6',
- 'port': '8470'
+ 'port': '8470',
+ 'health': 'HEALTHY'
}
}