cleanup; fix possible socket drop

Haven't seen it happen, but I think there was a bug that could have led
to all the sockets coming back as ready from poll() being dropped. Fixed
that and added/cleaned up some logging.
This commit is contained in:
Eric House 2017-12-04 20:40:44 -08:00
parent d5c4ecabce
commit 70dea02efc
2 changed files with 26 additions and 14 deletions

View file

@ -400,35 +400,40 @@ XWThreadPool::real_listener()
curfd = 1;
int ii;
for ( ii = 0; ii < nSockets && nEvents > 0; ++ii ) {
for ( ii = 0; ii < nSockets && nEvents > 0; ++ii, ++curfd ) {
if ( fds[curfd].revents != 0 ) {
// int socket = fds[curfd].fd;
SockInfo* sinfo = &sinfos[curfd];
const AddrInfo* addr = &sinfo->m_addr;
assert( fds[curfd].fd == addr->getSocket() );
int sock = addr->getSocket();
assert( fds[curfd].fd == sock );
if ( !SocketFound( addr ) ) {
logf( XW_LOGINFO, "%s(): dropping socket %d: not found",
__func__, addr->getSocket() );
/* no further processing if it's been removed while
we've been sleeping in poll */
we've been sleeping in poll. BUT: shouldn't curfd
be incremented?? */
--nEvents;
continue;
}
if ( 0 != (fds[curfd].revents & (POLLIN | POLLPRI)) ) {
if ( !UdpQueue::get()->handle( addr, sinfo->m_proc ) ) {
// This is likely wrong!!! return of 0 means
// remote closed, not error.
RemoveSocket( addr );
EnqueueKill( addr, "bad packet" );
EnqueueKill( addr, "got EOF" );
}
} else {
logf( XW_LOGERROR, "odd revents: %x",
fds[curfd].revents );
logf( XW_LOGERROR, "%s(): odd revents: %x; bad socket %d",
__func__, fds[curfd].revents, sock );
RemoveSocket( addr );
EnqueueKill( addr, "error/hup in poll()" );
EnqueueKill( addr, "error/hup in poll()" );
}
--nEvents;
}
++curfd;
}
assert( nEvents == 0 );
}

View file

@ -1303,14 +1303,16 @@ handleMsgsMsg( const AddrInfo* addr, bool sendFull,
const uint8_t* bufp, const uint8_t* end )
{
unsigned short nameCount;
int ii;
if ( getNetShort( &bufp, end, &nameCount ) ) {
DBMgr* dbmgr = DBMgr::Get();
vector<uint8_t> out(4); /* space for len and n_msgs */
assert( out.size() == 4 );
vector<int> msgIDs;
for ( ii = 0; ii < nameCount && bufp < end; ++ii ) {
for ( int ii = 0; ii < nameCount; ++ii ) {
if ( bufp >= end ) {
logf( XW_LOGERROR, "%s(): ran off the end", __func__ );
break;
}
// See NetUtils.java for reply format
// message-length: 2
// nameCount: 2
@ -1344,9 +1346,14 @@ handleMsgsMsg( const AddrInfo* addr, bool sendFull,
memcpy( &out[0], &tmp, sizeof(tmp) );
tmp = htons( nameCount );
memcpy( &out[2], &tmp, sizeof(tmp) );
ssize_t nwritten = write( addr->getSocket(), &out[0], out.size() );
logf( XW_LOGVERBOSE0, "%s: wrote %d bytes", __func__, nwritten );
if ( sendFull && nwritten >= 0 && (size_t)nwritten == out.size() ) {
int sock = addr->getSocket();
ssize_t nWritten = write( sock, &out[0], out.size() );
if ( nWritten < 0 ) {
logf( XW_LOGERROR, "%s(): write to socket %d failed: %d/%s", __func__,
sock, errno, strerror(errno) );
} else if ( sendFull && (size_t)nWritten == out.size() ) {
logf( XW_LOGVERBOSE0, "%s(): wrote %d bytes to socket %d", __func__,
nWritten, sock );
dbmgr->RecordSent( &msgIDs[0], msgIDs.size() );
// This is wrong: should be removed when ACK returns and not
// before. But for some reason if I make that change apps wind up