From 8284799abe335439d7da25c68c51c927d8f73e12 Mon Sep 17 00:00:00 2001 From: ehouse Date: Tue, 28 Jul 2009 05:15:26 +0000 Subject: [PATCH] Fix crashes resulting from deleting cref instances in one thread while another wanted to operate on them. The root problem is that you can't dispose of a mutex while somebody's blocking on it. So now the locking mutexes live inside the cref class. When the lock owner realizes the cref needs to die, it sets a flag and it's moved to a recycled list. A thread blocking on the mutex will then get it, but checks the flag and releases it immediately if it's being recycled. (Also improve the http interface a bit.) With these changes I've run 31K (and counting) games against the relay without a crash or deadlock (using sim_real.sh.) The main problem that remains is that sometimes two games using the same cookie wind up with two crefs (and so never connect.) --- xwords4/relay/cref.cpp | 165 +++++++++++++++----- xwords4/relay/cref.h | 16 +- xwords4/relay/crefmgr.cpp | 321 +++++++++++++++----------------------- xwords4/relay/crefmgr.h | 47 ++++-- xwords4/relay/ctrl.cpp | 8 +- xwords4/relay/http.cpp | 39 +++-- xwords4/relay/mlock.h | 5 +- 7 files changed, 322 insertions(+), 279 deletions(-) diff --git a/xwords4/relay/cref.cpp b/xwords4/relay/cref.cpp index c3abe94d2..15dee0773 100644 --- a/xwords4/relay/cref.cpp +++ b/xwords4/relay/cref.cpp @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include #include "cref.h" #include "xwrelay.h" @@ -71,23 +74,36 @@ SocketsIterator::Next() * CookieRef class *****************************************************************************/ -CookieRef::CookieRef( const char* cookie, const char* connName, CookieID id ) - : m_cookie(cookie==NULL?"":cookie) - , m_connName(connName) - , m_cookieID(id) - , m_totalSent(0) - , m_curState(XWS_INITED) - , m_nextState(XWS_INITED) - , m_eventQueue() - , m_nextHostID(HOST_ID_SERVER) - , m_nPlayersTotal(0) - , m_nPlayersHere(0) +#define ASSERT_LOCKED() \ + assert( m_locking_thread == pthread_self() ) + +void +CookieRef::ReInit( const char* cookie, const char* connName, CookieID id ) { + m_cookie = cookie==NULL?"":cookie; + m_connName = connName; + m_cookieID = id; + m_totalSent = 0; + m_curState = XWS_INITED; + m_nextState = XWS_INITED; + m_nextHostID = HOST_ID_SERVER; + m_nPlayersTotal = 0; + m_nPlayersHere = 0; + m_locking_thread = NULL; + RelayConfigs::GetConfigs()->GetValueFor( "HEARTBEAT", &m_heatbeat ); - logf( XW_LOGINFO, "creating cref for cookie %s, connName %s", + logf( XW_LOGINFO, "initing cref for cookie %s, connName %s", m_cookie.c_str(), m_connName.c_str() ); } + +CookieRef::CookieRef( const char* cookie, const char* connName, CookieID id ) + : m_locking_thread(NULL) +{ + pthread_mutex_init( &m_mutex, NULL ); + ReInit( cookie, connName, id ); +} + CookieRef::~CookieRef() { cancelAllConnectedTimer(); @@ -95,6 +111,7 @@ CookieRef::~CookieRef() /* get rid of any sockets still contained */ XWThreadPool* tPool = XWThreadPool::GetTPool(); + ASSERT_LOCKED(); for ( ; ; ) { map::iterator iter = m_sockets.begin(); @@ -107,10 +124,51 @@ CookieRef::~CookieRef() m_sockets.erase( iter ); } - logf( XW_LOGINFO, "CookieRef for %d being deleted; sent %d bytes", + logf( XW_LOGINFO, + "CookieRef for %d being deleted; sent %d bytes over lifetime", m_cookieID, m_totalSent ); } /* ~CookieRef */ +void +CookieRef::Clear(void) +{ + m_cookie = ""; + m_connName = ""; + m_cookieID = 0; + m_eventQueue.clear(); +} + +bool +CookieRef::Lock( void ) +{ + bool success = true; + + pthread_mutex_lock( &m_mutex ); + + /* We get here possibly after having been blocked on the mutex for a + while. This cref may no longer be live. If it's not, unlock and + return. */ + + assert( m_locking_thread == 0 ); + m_locking_thread = pthread_self(); + + if ( notInUse() ) { + logf( XW_LOGINFO, "%s: not locking %p because not in use", __func__, this ); + success = false; + m_locking_thread = NULL; + pthread_mutex_unlock( &m_mutex ); + } + + return success; +} /* CookieRef::Lock */ + +void +CookieRef::Unlock() { + assert( m_locking_thread == pthread_self() ); + m_locking_thread = NULL; + pthread_mutex_unlock( &m_mutex ); +} + void CookieRef::_Connect( int socket, HostID hid, int nPlayersH, int nPlayersT ) { @@ -166,6 +224,7 @@ int CookieRef::SocketForHost( HostID dest ) { int socket; + ASSERT_LOCKED(); map::iterator iter = m_sockets.find( dest ); if ( iter == m_sockets.end() ) { socket = -1; @@ -235,6 +294,7 @@ CookieRef::removeSocket( int socket ) int count; { /* RWWriteLock rwl( &m_sockets_rwlock ); */ + ASSERT_LOCKED(); count = m_sockets.size(); assert( count > 0 ); @@ -256,9 +316,21 @@ CookieRef::removeSocket( int socket ) bool CookieRef::HasSocket( int socket ) +{ + bool result = Lock(); + if ( result ) { + result = HasSocket_locked( socket ); + Unlock(); + } + return result; +} + +bool +CookieRef::HasSocket_locked( int socket ) { bool found = false; + ASSERT_LOCKED(); map::iterator iter = m_sockets.begin(); while ( iter != m_sockets.end() ) { if ( iter->second.m_socket == socket ) { @@ -267,8 +339,9 @@ CookieRef::HasSocket( int socket ) } ++iter; } + return found; -} /* HasSocket */ +} /* HasSocket_locked */ #ifdef RELAY_HEARTBEAT void @@ -283,7 +356,7 @@ void CookieRef::_CheckHeartbeats( time_t now ) { logf( XW_LOGINFO, "CookieRef::_CheckHeartbeats" ); - + ASSERT_LOCKED(); map::iterator iter = m_sockets.begin(); while ( iter != m_sockets.end() ) { time_t last = iter->second.m_lastHeartbeat; @@ -505,26 +578,17 @@ CookieRef::send_with_length( int socket, unsigned char* buf, int bufLen, bool cascade ) { bool failed = false; - { - SocketWriteLock slock( socket ); - if ( slock.socketFound() ) { - if ( send_with_length_unsafe( socket, buf, bufLen ) ) { - RecordSent( bufLen, socket ); - } else { - failed = true; - /* ok that the slock above is still in scope */ - /* Can't call killSocket. It will deadlock (try to lock a mutex - we already hold) if what we're sending about is that we're - killing a socket */ - /* killSocket( socket, "couldn't send" ); */ - } - } + if ( send_with_length_unsafe( socket, buf, bufLen ) ) { + RecordSent( bufLen, socket ); + } else { + failed = true; } + if ( failed && cascade ) { _Remove( socket ); XWThreadPool::GetTPool()->CloseSocket( socket ); } -} +} /* send_with_length */ static void putNetShort( unsigned char** bufpp, unsigned short s ) @@ -566,6 +630,7 @@ void CookieRef::reducePlayerCounts( int socket ) { logf( XW_LOGVERBOSE1, "reducePlayerCounts on socket %d", socket ); + ASSERT_LOCKED(); map::iterator iter = m_sockets.begin(); while ( iter != m_sockets.end() ) { @@ -646,6 +711,7 @@ CookieRef::sendResponse( const CRefEvent* evt, bool initial ) assert( id != HOST_ID_NONE ); logf( XW_LOGINFO, "remembering pair: hostid=%x, socket=%d", id, socket ); HostRec hr(socket, nPlayersH, nPlayersT); + ASSERT_LOCKED(); m_sockets.insert( pair(id,hr) ); /* Now send the response */ @@ -722,7 +788,7 @@ CookieRef::notifyOthers( int socket, XWRelayMsg msg, XWREASON why ) assert( socket != 0 ); /* RWReadLock ml( &m_sockets_rwlock ); */ - + ASSERT_LOCKED(); map::iterator iter = m_sockets.begin(); while ( iter != m_sockets.end() ) { int other = iter->second.m_socket; @@ -750,7 +816,7 @@ CookieRef::sendAllHere( bool includeName ) memcpy( bufp, connName, len ); bufp += len; } - + ASSERT_LOCKED(); map::iterator iter = m_sockets.begin(); while ( iter != m_sockets.end() ) { send_with_length( iter->second.m_socket, buf, bufp-buf, @@ -764,6 +830,7 @@ CookieRef::disconnectSockets( int socket, XWREASON why ) { if ( socket == 0 ) { /* RWReadLock ml( &m_sockets_rwlock ); */ + ASSERT_LOCKED(); map::iterator iter = m_sockets.begin(); while ( iter != m_sockets.end() ) { assert( iter->second.m_socket != 0 ); @@ -783,7 +850,7 @@ CookieRef::noteHeartbeat( const CRefEvent* evt ) HostID id = evt->u.heart.id; /* RWWriteLock rwl( &m_sockets_rwlock ); */ - + ASSERT_LOCKED(); map::iterator iter = m_sockets.find(id); if ( iter == m_sockets.end() ) { logf( XW_LOGERROR, "no socket for HostID %x", id ); @@ -792,6 +859,7 @@ CookieRef::noteHeartbeat( const CRefEvent* evt ) /* PENDING If the message came on an unexpected socket, kill the connection. An attack is the most likely explanation. */ assert( iter->second.m_socket == socket ); + /* if see this again recover from it */ logf( XW_LOGVERBOSE1, "upping m_lastHeartbeat from %d to %d", iter->second.m_lastHeartbeat, uptime() ); @@ -869,6 +937,7 @@ CookieRef::_PrintCookieInfo( string& out ) /* n messages */ /* open since when */ + ASSERT_LOCKED(); snprintf( buf, sizeof(buf), "Hosts connected=%d; cur time = %ld\n", m_sockets.size(), uptime() ); out += buf; @@ -884,13 +953,29 @@ CookieRef::_PrintCookieInfo( string& out ) } /* PrintCookieInfo */ void -CookieRef::_FormatSockets( string& out ) +CookieRef::_FormatHostInfo( string* hostIds, string* addrs ) { - map::iterator iter = m_sockets.begin(); - while ( iter != m_sockets.end() ) { - char buf[8]; - snprintf( buf, sizeof(buf), "%d ", iter->first ); - out += buf; - ++iter; + logf( XW_LOGINFO, "%s", __func__ ); + + ASSERT_LOCKED(); + map::iterator iter; + for ( iter = m_sockets.begin(); iter != m_sockets.end(); ++iter ) { + + if ( !!hostIds ) { + char buf[8]; + snprintf( buf, sizeof(buf), "%d ", iter->first ); + *hostIds += buf; + } + + if ( !!addrs ) { + int s = iter->second.m_socket; + struct sockaddr_in name; + socklen_t siz = sizeof(name); + if ( 0 == getpeername( s, (struct sockaddr*)&name, &siz) ) { + char buf[32] = {0}; + snprintf( buf, sizeof(buf), "%s ", inet_ntoa(name.sin_addr) ); + *addrs += buf; + } + } } } diff --git a/xwords4/relay/cref.h b/xwords4/relay/cref.h index 900fb8e6e..c89b13cef 100644 --- a/xwords4/relay/cref.h +++ b/xwords4/relay/cref.h @@ -60,8 +60,14 @@ class CookieRef { friend class CookieMapIterator; CookieRef( const char* cookie, const char* connName, CookieID id ); + void ReInit( const char* cookie, const char* connName, CookieID id ); ~CookieRef(); + void Clear(void); /* make clear it's unused */ + + bool Lock( void ); + void Unlock( void ); + /* Within this cookie, remember that this hostID and socket go together. If the hostID is HOST_ID_SERVER, it's the server. */ CookieID GetCookieID() { return m_cookieID; } @@ -73,6 +79,7 @@ class CookieRef { bool HostKnown( HostID host ) { return -1 != SocketForHost( host ); } int CountSockets() { return m_sockets.size(); } bool HasSocket( int socket ); + bool HasSocket_locked( int socket ); const char* Cookie() { return m_cookie.c_str(); } const char* ConnName() { return m_connName.c_str(); } @@ -85,7 +92,7 @@ class CookieRef { /* for console */ void _PrintCookieInfo( string& out ); void PrintSocketInfo( string& out, int socket ); - void _FormatSockets( string& out ); + void _FormatHostInfo( string* hostIds, string* addrs ); static CookieMapIterator GetCookieIterator(); @@ -188,6 +195,9 @@ class CookieRef { void sendAllHere( bool includeName ); HostID nextHostID() { return ++m_nextHostID; } + + bool notInUse(void) { return m_cookieID == 0; } + /* timer callback */ static void s_checkAllConnected( void* closure ); @@ -213,6 +223,10 @@ class CookieRef { HostID m_nextHostID; int m_nPlayersTotal; int m_nPlayersHere; + + pthread_mutex_t m_mutex; + + pthread_t m_locking_thread; /* for debugging only */ }; /* CookieRef */ #endif diff --git a/xwords4/relay/crefmgr.cpp b/xwords4/relay/crefmgr.cpp index 074b5f6b6..513a00cdd 100644 --- a/xwords4/relay/crefmgr.cpp +++ b/xwords4/relay/crefmgr.cpp @@ -58,8 +58,8 @@ CRefMgr::CRefMgr() : m_nextCID(0) { /* should be using pthread_once() here */ - pthread_mutex_init( &m_guard, NULL ); pthread_mutex_init( &m_SocketStuffMutex, NULL ); + pthread_mutex_init( &m_freeList_mutex, NULL ); pthread_rwlock_init( &m_cookieMapRWLock, NULL ); } @@ -67,7 +67,7 @@ CRefMgr::~CRefMgr() { assert( this == s_instance ); - pthread_mutex_destroy( &m_guard ); + pthread_mutex_destroy( &m_freeList_mutex ); pthread_rwlock_destroy( &m_cookieMapRWLock ); SocketMap::iterator iter; @@ -84,17 +84,21 @@ CRefMgr::CloseAll() { /* Get every cref instance, shut it down */ - RWWriteLock rwl( &m_cookieMapRWLock ); - CookieMap::iterator iter = m_cookieMap.begin(); - while ( iter != m_cookieMap.end() ) { - CookieRef* cref = iter->second; + for ( ; ; ) { + CookieRef* cref = NULL; { - SafeCref scr( cref ); - scr.Shutdown(); + RWWriteLock rwl( &m_cookieMapRWLock ); + CookieMap::iterator iter = m_cookieMap.begin(); + if ( iter == m_cookieMap.end() ) { + break; + } + cref = iter->second; + { + SafeCref scr( cref ); /* cref */ + scr.Shutdown(); + } } - ++iter; } - } /* CloseAll */ CookieRef* @@ -175,20 +179,32 @@ CRefMgr::cookieIDForConnName( const char* connName ) return cid; } /* cookieIDForConnName */ +void +CRefMgr::addToFreeList( CookieRef* cref ) +{ + MutexLock ml( &m_freeList_mutex ); + m_freeList.push_back( cref ); +} + +CookieRef* +CRefMgr::getFromFreeList( void ) +{ + CookieRef* cref = NULL; + MutexLock ml( &m_freeList_mutex ); + if ( m_freeList.size() > 0 ) { + cref = m_freeList.front(); + m_freeList.pop_front(); + } + return cref; +} + + CookieRef* CRefMgr::getMakeCookieRef_locked( const char* cORn, bool isCookie, HostID hid, int socket, int nPlayersH, int nPlayersT ) { CookieRef* cref; -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "tlm %p", &m_guard ); -#endif - pthread_mutex_lock( &m_guard ); -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "slm %p", &m_guard ); -#endif - /* We have a cookie from a new connection. This may be the first time it's been seen, or there may be a game currently in the XW_ST_CONNECTING state, or it may be a dupe of a connect packet. So we @@ -216,13 +232,6 @@ CRefMgr::getMakeCookieRef_locked( const char* cORn, bool isCookie, HostID hid, cref = AddNew( cookie, connName, cid ); } - if ( cref == NULL ) { -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", &m_guard ); -#endif - pthread_mutex_unlock( &m_guard ); - } - return cref; } /* getMakeCookieRef_locked */ @@ -254,7 +263,7 @@ CRefMgr::Disassociate( int socket, CookieRef* cref ) MutexLock ml( &m_SocketStuffMutex ); SocketMap::iterator iter = m_SocketStuff.find( socket ); if ( iter == m_SocketStuff.end() ) { - logf( XW_LOGERROR, "can't find cref/threadID pair for socket %d", socket ); + logf( XW_LOGERROR, "can't find SocketStuff for socket %d", socket ); } else { SocketStuff* stuff = iter->second; assert( cref == NULL || stuff->m_cref == cref ); @@ -263,24 +272,33 @@ CRefMgr::Disassociate( int socket, CookieRef* cref ) } } +#if 0 pthread_mutex_t* CRefMgr::GetWriteMutexForSocket( int socket ) { + pthread_mutex_t* mutex = NULL; MutexLock ml( &m_SocketStuffMutex ); SocketMap::iterator iter = m_SocketStuff.find( socket ); if ( iter != m_SocketStuff.end() ) { SocketStuff* stuff = iter->second; - return &stuff->m_writeMutex; + /* this is dangerous! What if we want to nuke SocketStuff while this + is locked? And shouldn't it be the case that only one thread at a + time can be trying to write to one of a given cref's sockets since + only one thread at a time is handling a cref? */ + mutex = &stuff->m_writeMutex; } logf( XW_LOGERROR, "GetWriteMutexForSocket: not found" ); - return NULL; + return mutex; } /* GetWriteMutexForSocket */ +#endif void CRefMgr::RemoveSocketRefs( int socket ) { - SafeCref scr( socket ); - scr.Remove( socket ); + { + SafeCref scr( socket ); + scr.Remove( socket ); + } Disassociate( socket, NULL ); } @@ -311,123 +329,24 @@ CRefMgr::MakeSocketsIterator() } CookieRef* -CRefMgr::getCookieRef_locked( CookieID cookieID ) +CRefMgr::getCookieRef( CookieID cookieID ) { -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "tlm %p", &m_guard ); -#endif - pthread_mutex_lock( &m_guard ); -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "slm %p", &m_guard ); -#endif - - CookieRef* cref = getCookieRef_impl( cookieID ); - - if ( cref == NULL ) { -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", &m_guard ); -#endif - pthread_mutex_unlock( &m_guard ); - } - - return cref; -} /* getCookieRef_locked */ + return getCookieRef_impl( cookieID ); +} /* getCookieRef */ CookieRef* -CRefMgr::getCookieRef_locked( int socket ) +CRefMgr::getCookieRef( int socket ) { -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "tlm %p", &m_guard ); -#endif - pthread_mutex_lock( &m_guard ); -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "slm %p", &m_guard ); -#endif CookieRef* cref = NULL; - - CookieMap::iterator iter = m_cookieMap.begin(); - while ( iter != m_cookieMap.end() ) { - CookieRef* tmp = iter->second; - if ( tmp->HasSocket( socket ) ) { - cref = tmp; - break; - } - ++iter; - } - - if ( cref == NULL ) { -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", &m_guard ); -#endif - pthread_mutex_unlock( &m_guard ); + MutexLock ml( &m_SocketStuffMutex ); + SocketMap::iterator iter = m_SocketStuff.find( socket ); + if ( iter != m_SocketStuff.end() ) { + SocketStuff* stuff = iter->second; + cref = stuff->m_cref; } return cref; -} /* getCookieRef_locked */ - -bool -CRefMgr::checkCookieRef_locked( CookieRef* cref ) -{ - bool exists = true; - assert( cref != NULL ); - -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "tlm %p", &m_guard ); -#endif - pthread_mutex_lock( &m_guard ); -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "slm %p", &m_guard ); -#endif - - pthread_mutex_t* cref_mutex = m_crefMutexes[cref]; - logf( XW_LOGINFO, "checkCookieRef_locked: cref_mutex=%p", cref_mutex ); - - if ( cref_mutex == NULL ) { -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", &m_guard ); -#endif - pthread_mutex_unlock( &m_guard ); - exists = false; - } - - return exists; -} /* checkCookieRef_locked */ - -bool -CRefMgr::LockCref( CookieRef* cref ) -{ - /* assertion: m_guard is locked */ - - pthread_mutex_t* cref_mutex = m_crefMutexes[cref]; - if ( cref_mutex == NULL ) { - cref_mutex = (pthread_mutex_t*)malloc( sizeof( *cref_mutex ) ); - pthread_mutex_init( cref_mutex, NULL ); - m_crefMutexes[cref] = cref_mutex; - } -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "tlm %p", cref_mutex ); -#endif - pthread_mutex_lock( cref_mutex ); -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "slm %p", cref_mutex ); -#endif - -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", &m_guard ); -#endif - pthread_mutex_unlock( &m_guard ); - return true; -} /* LockCref */ - -void -CRefMgr::UnlockCref( CookieRef* cref ) -{ - pthread_mutex_t* cref_mutex = m_crefMutexes[cref]; -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", cref_mutex ); -#endif - pthread_mutex_unlock( cref_mutex ); -} +} /* getCookieRef */ #ifdef RELAY_HEARTBEAT /* static */ void @@ -444,11 +363,21 @@ CRefMgr::AddNew( const char* cookie, const char* connName, CookieID id ) logf( XW_LOGINFO, "%s( cookie=%s, connName=%s, id=%d", __func__, cookie, connName, id ); CookieRef* exists = getCookieRef_impl( id ); - assert( exists == NULL ); + assert( exists == NULL ); /* failed once */ + + CookieRef* ref = getFromFreeList(); RWWriteLock rwl( &m_cookieMapRWLock ); logf( XW_LOGINFO, "making new cref: %d", id ); - CookieRef* ref = new CookieRef( cookie, connName, id ); + + if ( !!ref ) { + logf( XW_LOGINFO, "using from free list" ); + ref->ReInit( cookie, connName, id ); + } else { + logf( XW_LOGINFO, "calling constructor" ); + ref = new CookieRef( cookie, connName, id ); + } + m_cookieMap.insert( pair(ref->GetCookieID(), ref ) ); logf( XW_LOGINFO, "paired cookie %s/connName %s with id %d", (cookie?cookie:"NULL"), connName, ref->GetCookieID() ); @@ -467,59 +396,52 @@ CRefMgr::AddNew( const char* cookie, const char* connName, CookieID id ) } /* AddNew */ void -CRefMgr::Delete( CookieRef* cref ) +CRefMgr::Recycle( CookieRef* cref ) { + logf( XW_LOGINFO, "%s(cref=%p,cookie=%s)", __func__, cref, cref->Cookie() ); + CookieID id = cref->GetCookieID(); + cref->Clear(); + addToFreeList( cref ); + + cref->Unlock(); + + /* don't grab this lock until after releasing cref's lock; otherwise + deadlock happens. */ RWWriteLock rwl( &m_cookieMapRWLock ); CookieMap::iterator iter = m_cookieMap.begin(); while ( iter != m_cookieMap.end() ) { CookieRef* ref = iter->second; if ( ref == cref ) { - logf( XW_LOGINFO, "erasing cref" ); + logf( XW_LOGINFO, "%s: erasing cref cid %d", __func__, id ); m_cookieMap.erase( iter ); break; } ++iter; } - pthread_mutex_t* cref_mutex = m_crefMutexes[cref]; -#ifdef DEBUG_LOCKS - logf( XW_LOGINFO, "ULM %p", cref_mutex ); -#endif - pthread_mutex_unlock( cref_mutex ); - pthread_mutex_destroy( cref_mutex ); - free( cref_mutex ); - - map::iterator iter2; - iter2 = m_crefMutexes.find(cref); - assert( iter2 != m_crefMutexes.end() ); - m_crefMutexes.erase( iter2 ); - - delete cref; #ifdef RELAY_HEARTBEAT if ( m_cookieMap.size() == 0 ) { TimerMgr::GetTimerMgr()->ClearTimer( heartbeatProc, this ); } #endif - - logf( XW_LOGINFO, "CRefMgr::Delete done" ); -} +} /* CRefMgr::Recycle */ void -CRefMgr::Delete( CookieID id ) +CRefMgr::Recycle( CookieID id ) { - CookieRef* cref = getCookieRef_impl( id ); + CookieRef* cref = getCookieRef( id ); if ( cref != NULL ) { - Delete( cref ); + Recycle( cref ); } } /* Delete */ void -CRefMgr::Delete( const char* connName ) +CRefMgr::Recycle( const char* connName ) { CookieID id = cookieIDForConnName( connName ); - Delete( id ); + Recycle( id ); } /* Delete */ CookieRef* @@ -566,13 +488,14 @@ CRefMgr::checkHeartbeats( time_t now ) /* static */ CookieMapIterator CRefMgr::GetCookieIterator() { - CookieMapIterator iter; + CookieMapIterator iter(&m_cookieMapRWLock); return iter; } -CookieMapIterator::CookieMapIterator() - : _iter( CRefMgr::Get()->m_cookieMap.begin() ) +CookieMapIterator::CookieMapIterator(pthread_rwlock_t* rwlock) + : m_rwl( rwlock ) + ,_iter( CRefMgr::Get()->m_cookieMap.begin() ) { } @@ -594,62 +517,64 @@ CookieMapIterator::Next() SafeCref::SafeCref( const char* cORn, bool isCookie, HostID hid, int socket, int nPlayersH, int nPlayersT ) - : m_cref( NULL ) - , m_mgr( CRefMgr::Get() ) + : m_cref( NULL ) + , m_mgr( CRefMgr::Get() ) + , m_isValid( false ) { CookieRef* cref; cref = m_mgr->getMakeCookieRef_locked( cORn, isCookie, hid, socket, nPlayersH, nPlayersT ); if ( cref != NULL ) { - if ( m_mgr->LockCref( cref ) ) { - m_cref = cref; - } + cref->Lock(); + m_cref = cref; + m_isValid = true; } } -SafeCref::SafeCref( CookieID connID ) - : m_cref( NULL ) - , m_mgr( CRefMgr::Get() ) +SafeCref::SafeCref( CookieID connID, bool failOk ) + : m_cref( NULL ) + , m_mgr( CRefMgr::Get() ) + , m_isValid( false ) { - CookieRef* cref = m_mgr->getCookieRef_locked( connID ); - if ( cref != NULL ) { - if ( m_mgr->LockCref( cref ) ) { - m_cref = cref; - } + CookieRef* cref = m_mgr->getCookieRef( connID ); + if ( cref != NULL ) { /* known cookie? */ + m_locked = cref->Lock(); + m_isValid = m_locked && connID == cref->GetCookieID(); + m_cref = cref; } } SafeCref::SafeCref( int socket ) - : m_cref( NULL ) - , m_mgr( CRefMgr::Get() ) + : m_cref( NULL ) + , m_mgr( CRefMgr::Get() ) + , m_isValid( false ) { - CookieRef* cref = m_mgr->getCookieRef_locked( socket ); - if ( cref != NULL ) { - if ( m_mgr->LockCref( cref ) ) { - m_cref = cref; - } + CookieRef* cref = m_mgr->getCookieRef( socket ); + if ( cref != NULL ) { /* known socket? */ + m_locked = cref->Lock(); + m_isValid = m_locked && cref->HasSocket_locked( socket ); + m_cref = cref; } } SafeCref::SafeCref( CookieRef* cref ) - : m_cref( NULL ) - , m_mgr( CRefMgr::Get() ) + : m_cref( NULL ) + , m_mgr( CRefMgr::Get() ) + , m_isValid( false ) { - if ( m_mgr->checkCookieRef_locked( cref ) ) { - if ( m_mgr->LockCref( cref ) ) { - m_cref = cref; - } - } + m_locked = cref->Lock(); + m_isValid = m_locked; + m_cref = cref; } SafeCref::~SafeCref() { - if ( m_cref != NULL ) { + if ( m_cref != NULL && m_locked ) { if ( m_cref->ShouldDie() ) { - m_mgr->Delete( m_cref ); + m_mgr->Recycle( m_cref ); } else { - m_mgr->UnlockCref( m_cref ); + m_cref->Unlock(); } } } diff --git a/xwords4/relay/crefmgr.h b/xwords4/relay/crefmgr.h index 2afac4aff..0c647b781 100644 --- a/xwords4/relay/crefmgr.h +++ b/xwords4/relay/crefmgr.h @@ -21,7 +21,10 @@ #ifndef _CREFMGR_H_ #define _CREFMGR_H_ +#include + #include "cref.h" +#include "mlock.h" typedef map CookieMap; class CookieMapIterator; @@ -59,9 +62,9 @@ class CRefMgr { CookieMapIterator GetCookieIterator(); /* PENDING. These need to go through SafeCref */ - void Delete( CookieID id ); - void Delete( CookieRef* cref ); - void Delete( const char* connName ); + void Recycle( CookieID id ); + void Recycle( CookieRef* cref ); + void Recycle( const char* connName ); CookieID CookieIdForName( const char* name ); /* For use from ctrl only!!!! */ @@ -81,11 +84,24 @@ class CRefMgr { private: friend class SafeCref; + + /* We'll recycle cref instances rather than free and new them. This + solves, inelegantly, a problem where I want to free an instance (while + holding its mutex) but can't know if other threads are trying to obtain + that mutex. It's illegal to destroy a mutex somebody's trying to lock. + So we recycle, let the other thread succeed in locking but then quickly + discover that the cref it got isn't what it wants. See the SafeCref + class. */ + list m_freeList; + pthread_mutex_t m_freeList_mutex; + void addToFreeList( CookieRef* cref ); + CookieRef* getFromFreeList( void ); + CookieRef* getMakeCookieRef_locked( const char* cORn, bool isCookie, HostID hid, int socket, int nPlayersH, int nPlayersT ); - CookieRef* getCookieRef_locked( CookieID cookieID ); - CookieRef* getCookieRef_locked( int socket ); + CookieRef* getCookieRef( CookieID cookieID ); + CookieRef* getCookieRef( int socket ); bool checkCookieRef_locked( CookieRef* cref ); CookieRef* getCookieRef_impl( CookieID cookieID ); CookieRef* AddNew( const char* cookie, const char* connName, CookieID id ); @@ -100,12 +116,9 @@ class CRefMgr { CookieID m_nextCID; - bool LockCref( CookieRef* cref ); + void LockCref( CookieRef* cref ); void UnlockCref( CookieRef* cref ); - pthread_mutex_t m_guard; - map m_crefMutexes; - pthread_rwlock_t m_cookieMapRWLock; CookieMap m_cookieMap; @@ -124,7 +137,7 @@ class SafeCref { public: SafeCref( const char* cookieOrConnName, bool cookie, HostID hid, int socket, int nPlayersH, int nPlayersT ); - SafeCref( CookieID cid ); + SafeCref( CookieID cid, bool failOk = false ); SafeCref( int socket ); SafeCref( CookieRef* cref ); ~SafeCref(); @@ -249,29 +262,29 @@ class SafeCref { } } - const char* GetHostsConnected( string& str ) { + void GetHostsConnected( string* hosts, string* addrs ) { if ( IsValid() ) { - m_cref->_FormatSockets( str ); - return str.c_str(); - } else { - return ""; + m_cref->_FormatHostInfo( hosts, addrs ); } } + bool IsValid() { return m_isValid; } private: - bool IsValid() { return m_cref != NULL; } CookieRef* m_cref; CRefMgr* m_mgr; + bool m_isValid; + bool m_locked; }; class CookieMapIterator { public: - CookieMapIterator(); + CookieMapIterator(pthread_rwlock_t* rwlock); ~CookieMapIterator() {} CookieID Next(); private: + RWReadLock m_rwl; CookieMap::const_iterator _iter; }; diff --git a/xwords4/relay/ctrl.cpp b/xwords4/relay/ctrl.cpp index 460ebe279..d9c2d1fcc 100644 --- a/xwords4/relay/ctrl.cpp +++ b/xwords4/relay/ctrl.cpp @@ -195,10 +195,10 @@ cmd_kill_eject( int socket, const char** args ) const char* id = args[3]; if ( idhow != NULL && id != NULL ) { if ( 0 == strcmp( idhow, "name" ) ) { - CRefMgr::Get()->Delete( id ); + CRefMgr::Get()->Recycle( id ); found = true; } else if ( 0 == strcmp( idhow, "id" ) ) { - CRefMgr::Get()->Delete( atoi( id ) ); + CRefMgr::Get()->Recycle( atoi( id ) ); found = true; } } @@ -360,7 +360,7 @@ format_uptime( char* buf, int len ) seconds %= 60; snprintf( buf, len, - "uptime: %d D, %d H, %d M, %ld S", + "%d D, %d H, %d M, %ld S", days, hours, minutes, seconds ); } @@ -374,7 +374,7 @@ cmd_uptime( int socket, const char** args ) } else { char buf[128]; format_uptime( buf, sizeof(buf) ); - print_to_sock( socket, true, "%s", buf ); + print_to_sock( socket, true, "uptime: %s", buf ); } return false; } diff --git a/xwords4/relay/http.cpp b/xwords4/relay/http.cpp index 7c546b5e2..cbb6f702b 100644 --- a/xwords4/relay/http.cpp +++ b/xwords4/relay/http.cpp @@ -120,6 +120,7 @@ printCrefs( FILE* fil ) "Players Here" "State" "Host IDs" + "Host IPs" ); fprintf( fil, "\n" ); @@ -127,22 +128,28 @@ printCrefs( FILE* fil ) CookieMapIterator iter = cmgr->GetCookieIterator(); CookieID id; for ( id = iter.Next(); id != 0; id = iter.Next() ) { - string tmp; - SafeCref scr( id ); - fprintf( fil, "" - "%s" /* name */ - "%s" /* conn name */ - "%d" /* cookie id */ - "%d" /* total sent */ - "%d" /* players */ - "%d" /* players here */ - "%s" /* State */ - "%s" /* Hosts */ - "", - scr.Cookie(), scr.ConnName(), scr.GetCookieID(), - scr.GetTotalSent(), scr.GetPlayersTotal(),scr.GetPlayersHere(), - scr.StateString(), scr.GetHostsConnected(tmp) - ); + string hosts, addrs; + SafeCref scr( id, true ); + if ( scr.IsValid() ) { + scr.GetHostsConnected( &hosts, &addrs ); + + fprintf( fil, "" + "%s" /* name */ + "%s" /* conn name */ + "%d" /* cookie id */ + "%d" /* total sent */ + "%d" /* players */ + "%d" /* players here */ + "%s" /* State */ + "%s" /* Hosts */ + "%s" /* Ip addrs */ + "", + scr.Cookie(), scr.ConnName(), scr.GetCookieID(), + scr.GetTotalSent(), scr.GetPlayersTotal(), + scr.GetPlayersHere(), scr.StateString(), + hosts.c_str(), addrs.c_str() + ); + } } fprintf( fil, "\n" ); } diff --git a/xwords4/relay/mlock.h b/xwords4/relay/mlock.h index 6813a4522..79c78824e 100644 --- a/xwords4/relay/mlock.h +++ b/xwords4/relay/mlock.h @@ -25,7 +25,6 @@ #include "xwrelay_priv.h" #include "cref.h" -#include "crefmgr.h" class MutexLock { public: @@ -52,9 +51,9 @@ class MutexLock { class SocketWriteLock { public: - SocketWriteLock( int socket ) + SocketWriteLock( int socket, pthread_mutex_t* mutex ) : m_socket( socket ) - , m_mutex( CRefMgr::Get()->GetWriteMutexForSocket( socket ) ) + , m_mutex( mutex ) { #ifdef DEBUG_LOCKS logf( XW_LOGINFO, "tlm %p for socket %d", m_mutex, socket );