From 8977164eee956eed8f7b287e158ebddb4d2621dd Mon Sep 17 00:00:00 2001
From: Kevin Tang <zhikait@codeaurora.org>
Date: Sat, 19 Jul 2014 18:37:29 -0700
Subject: [PATCH 1/2] fixing the SSR recovery race condition

There is a race condition where when startFix is
called right at the time when modem or griffon
subsystem is down, GPS HAL doesn't get the correct
error code, and therefore the right handling.

Mapped ENGINE_DOWN to ENGINE_OFFLINE, as they are
the same; and modified loc_eng_start_handler to
update the state upon the right error code.

There is a one problem though. General failure is
also handled as SSR. This is because of an unhandled
race condition in the kernel, so the error code
returned and propagated is not deterministic enough
for us to tell if this is SSR. Until that fix is in
place, we might have to treat general failure as SSR
although the side effect should be none. Only
semantically incorrect.

Change-Id: If93823f08428275da171bb22d73a06e38365585b
CR-Fixed: 692085
---
 core/gps_extended_c.h                | 5 +++--
 loc_api/libloc_api_50001/loc_eng.cpp | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/core/gps_extended_c.h b/core/gps_extended_c.h
index d4a7dbea..04a78a31 100644
--- a/core/gps_extended_c.h
+++ b/core/gps_extended_c.h
@@ -306,8 +306,9 @@ enum loc_api_adapter_err {
     LOC_API_ADAPTER_ERR_TIMEOUT             = 8,
     LOC_API_ADAPTER_ERR_SERVICE_NOT_PRESENT = 9,
 
-    LOC_API_ADAPTER_ERR_ENGINE_DOWN         = 100,
-    LOC_API_ADAPTER_ERR_FAILURE,
+    /* equating engine down to phone offline, as they are the same errror */
+    LOC_API_ADAPTER_ERR_ENGINE_DOWN         = LOC_API_ADAPTER_ERR_PHONE_OFFLINE,
+    LOC_API_ADAPTER_ERR_FAILURE             = 101,
     LOC_API_ADAPTER_ERR_UNKNOWN
 };
 
diff --git a/loc_api/libloc_api_50001/loc_eng.cpp b/loc_api/libloc_api_50001/loc_eng.cpp
index 320a61d8..b23df770 100644
--- a/loc_api/libloc_api_50001/loc_eng.cpp
+++ b/loc_api/libloc_api_50001/loc_eng.cpp
@@ -1718,7 +1718,9 @@ static int loc_eng_start_handler(loc_eng_data_s_type &loc_eng_data)
        ret_val = loc_eng_data.adapter->startFix();
 
        if (ret_val == LOC_API_ADAPTER_ERR_SUCCESS ||
-           ret_val == LOC_API_ADAPTER_ERR_ENGINE_DOWN)
+           ret_val == LOC_API_ADAPTER_ERR_ENGINE_DOWN ||
+           ret_val == LOC_API_ADAPTER_ERR_PHONE_OFFLINE ||
+           ret_val == LOC_API_ADAPTER_ERR_GENERAL_FAILURE)
        {
            loc_eng_data.adapter->setInSession(TRUE);
            loc_inform_gps_status(loc_eng_data, GPS_STATUS_SESSION_BEGIN);

From 33e5fd9015841f10711a920b0840ccc2e62b812a Mon Sep 17 00:00:00 2001
From: Kevin Tang <zhikait@codeaurora.org>
Date: Sun, 20 Jul 2014 18:55:44 -0700
Subject: [PATCH 2/2] loc timer util fix to handle the race condition

loc timer util stop() routine may have race condition
with the timer thread, when timer expires at the same
time stop() routine tries to lock mutex.  The race
condition can go 2 ways:
* timer thread expires, unlocks mutex, context switch,
stop() thread acquires lock, context switch, timer
thread destroys mutex.  Destroy will fail, resulting
mutex leak.
* timer thread expires, unlocks mutex, destroys mutex,
stop() acqures lock, signal, and releases lock. Would
be super rare conditions though.

Fix is that we give 5 seconds for stop() thread to
give up the lock when destroy. After that the timer
thread will release the mutex and go on destroy.
Meanwhile the stop() thread would check the lock
return to move on with signal and unlock.

Change-Id: Iff9e34d08a1faf0828049de2fede2e7a5d15b161
CRs-Fixed: 699856
---
 utils/loc_timer.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/utils/loc_timer.c b/utils/loc_timer.c
index 1e4008eb..2beca5fa 100644
--- a/utils/loc_timer.c
+++ b/utils/loc_timer.c
@@ -101,12 +101,28 @@ static void *timer_thread(void *thread_data)
         break;
     }
 
-    pthread_mutex_destroy(&t->timer_mutex);
-    pthread_cond_destroy(&t->timer_cond);
-
     if(ETIMEDOUT == ret)
         t->callback_func(t->user_data, ret);
 
+    // A (should be rare) race condition is that, when the loc_time_stop is called
+    // and acquired mutex, we reach here.  pthread_mutex_destroy will fail with
+    // error code EBUSY.  We give it 6 tries in 5 seconds.  Should be eanough time
+    // for loc_timer_stop to complete.  With the 7th try, we also perform unlock
+    // prior to destroy.
+    {
+        int i;
+        for (i = 0; EBUSY == pthread_mutex_destroy(&t->timer_mutex) && i <= 5; i++) {
+            if (i < 5) {
+                sleep(1);
+            } else {
+                // nah, forget it, something is seriously wrong.  Mutex has been
+                // held too long.  Unlock the mutext here.
+                pthread_mutex_unlock(&t->timer_mutex);
+            }
+        }
+    }
+    pthread_cond_destroy(&t->timer_cond);
+
     free(t);
     LOC_LOGD("%s:%d]: Exit\n", __func__, __LINE__);
     return NULL;
@@ -175,8 +191,8 @@ _err:
 void loc_timer_stop(void* handle) {
     timer_data* t = (timer_data*)handle;
 
-    if (NULL != t && (READY == t->state || WAITING == t->state)) {
-        pthread_mutex_lock(&(t->timer_mutex));
+    if (NULL != t && (READY == t->state || WAITING == t->state) &&
+        pthread_mutex_lock(&(t->timer_mutex)) == 0) {
         if (READY == t->state || WAITING == t->state) {
             pthread_cond_signal(&t->timer_cond);
             t->state = ABORT;