From 8977164eee956eed8f7b287e158ebddb4d2621dd Mon Sep 17 00:00:00 2001 From: Kevin Tang Date: Sat, 19 Jul 2014 18:37:29 -0700 Subject: [PATCH 1/2] fixing the SSR recovery race condition There is a race condition where when startFix is called right at the time when modem or griffon subsystem is down, GPS HAL doesn't get the correct error code, and therefore the right handling. Mapped ENGINE_DOWN to ENGINE_OFFLINE, as they are the same; and modified loc_eng_start_handler to update the state upon the right error code. There is a one problem though. General failure is also handled as SSR. This is because of an unhandled race condition in the kernel, so the error code returned and propagated is not deterministic enough for us to tell if this is SSR. Until that fix is in place, we might have to treat general failure as SSR although the side effect should be none. Only semantically incorrect. Change-Id: If93823f08428275da171bb22d73a06e38365585b CR-Fixed: 692085 --- core/gps_extended_c.h | 5 +++-- loc_api/libloc_api_50001/loc_eng.cpp | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/core/gps_extended_c.h b/core/gps_extended_c.h index d4a7dbea..04a78a31 100644 --- a/core/gps_extended_c.h +++ b/core/gps_extended_c.h @@ -306,8 +306,9 @@ enum loc_api_adapter_err { LOC_API_ADAPTER_ERR_TIMEOUT = 8, LOC_API_ADAPTER_ERR_SERVICE_NOT_PRESENT = 9, - LOC_API_ADAPTER_ERR_ENGINE_DOWN = 100, - LOC_API_ADAPTER_ERR_FAILURE, + /* equating engine down to phone offline, as they are the same errror */ + LOC_API_ADAPTER_ERR_ENGINE_DOWN = LOC_API_ADAPTER_ERR_PHONE_OFFLINE, + LOC_API_ADAPTER_ERR_FAILURE = 101, LOC_API_ADAPTER_ERR_UNKNOWN }; diff --git a/loc_api/libloc_api_50001/loc_eng.cpp b/loc_api/libloc_api_50001/loc_eng.cpp index 320a61d8..b23df770 100644 --- a/loc_api/libloc_api_50001/loc_eng.cpp +++ b/loc_api/libloc_api_50001/loc_eng.cpp @@ -1718,7 +1718,9 @@ static int loc_eng_start_handler(loc_eng_data_s_type &loc_eng_data) ret_val = loc_eng_data.adapter->startFix(); if (ret_val == LOC_API_ADAPTER_ERR_SUCCESS || - ret_val == LOC_API_ADAPTER_ERR_ENGINE_DOWN) + ret_val == LOC_API_ADAPTER_ERR_ENGINE_DOWN || + ret_val == LOC_API_ADAPTER_ERR_PHONE_OFFLINE || + ret_val == LOC_API_ADAPTER_ERR_GENERAL_FAILURE) { loc_eng_data.adapter->setInSession(TRUE); loc_inform_gps_status(loc_eng_data, GPS_STATUS_SESSION_BEGIN); From 33e5fd9015841f10711a920b0840ccc2e62b812a Mon Sep 17 00:00:00 2001 From: Kevin Tang Date: Sun, 20 Jul 2014 18:55:44 -0700 Subject: [PATCH 2/2] loc timer util fix to handle the race condition loc timer util stop() routine may have race condition with the timer thread, when timer expires at the same time stop() routine tries to lock mutex. The race condition can go 2 ways: * timer thread expires, unlocks mutex, context switch, stop() thread acquires lock, context switch, timer thread destroys mutex. Destroy will fail, resulting mutex leak. * timer thread expires, unlocks mutex, destroys mutex, stop() acqures lock, signal, and releases lock. Would be super rare conditions though. Fix is that we give 5 seconds for stop() thread to give up the lock when destroy. After that the timer thread will release the mutex and go on destroy. Meanwhile the stop() thread would check the lock return to move on with signal and unlock. Change-Id: Iff9e34d08a1faf0828049de2fede2e7a5d15b161 CRs-Fixed: 699856 --- utils/loc_timer.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/utils/loc_timer.c b/utils/loc_timer.c index 1e4008eb..2beca5fa 100644 --- a/utils/loc_timer.c +++ b/utils/loc_timer.c @@ -101,12 +101,28 @@ static void *timer_thread(void *thread_data) break; } - pthread_mutex_destroy(&t->timer_mutex); - pthread_cond_destroy(&t->timer_cond); - if(ETIMEDOUT == ret) t->callback_func(t->user_data, ret); + // A (should be rare) race condition is that, when the loc_time_stop is called + // and acquired mutex, we reach here. pthread_mutex_destroy will fail with + // error code EBUSY. We give it 6 tries in 5 seconds. Should be eanough time + // for loc_timer_stop to complete. With the 7th try, we also perform unlock + // prior to destroy. + { + int i; + for (i = 0; EBUSY == pthread_mutex_destroy(&t->timer_mutex) && i <= 5; i++) { + if (i < 5) { + sleep(1); + } else { + // nah, forget it, something is seriously wrong. Mutex has been + // held too long. Unlock the mutext here. + pthread_mutex_unlock(&t->timer_mutex); + } + } + } + pthread_cond_destroy(&t->timer_cond); + free(t); LOC_LOGD("%s:%d]: Exit\n", __func__, __LINE__); return NULL; @@ -175,8 +191,8 @@ _err: void loc_timer_stop(void* handle) { timer_data* t = (timer_data*)handle; - if (NULL != t && (READY == t->state || WAITING == t->state)) { - pthread_mutex_lock(&(t->timer_mutex)); + if (NULL != t && (READY == t->state || WAITING == t->state) && + pthread_mutex_lock(&(t->timer_mutex)) == 0) { if (READY == t->state || WAITING == t->state) { pthread_cond_signal(&t->timer_cond); t->state = ABORT;