exit threads when BT off; add util to report deadlocks

I've seen a deadlock in the BT stuff. Now they'll be caught and reported
via Crashlytics on DEBUG builds.
This commit is contained in:
Eric House 2019-02-17 12:49:17 -08:00
parent e23129c0e7
commit 1505a443ac
2 changed files with 193 additions and 83 deletions

View file

@ -248,6 +248,8 @@ public class BTService extends XWJIService {
if ( BTEnabled() ) { if ( BTEnabled() ) {
enqueueWork( context, BTService.class, sJobID, intent ); enqueueWork( context, BTService.class, sJobID, intent );
// Log.d( TAG, "enqueueWork(%s)", cmdFrom( intent, BTAction.values() ) ); // Log.d( TAG, "enqueueWork(%s)", cmdFrom( intent, BTAction.values() ) );
} else {
Log.d( TAG, "enqueueWork(): BT disabled so doing nothing" );
} }
} }
@ -498,6 +500,8 @@ public class BTService extends XWJIService {
default: default:
Assert.fail(); Assert.fail();
} }
} else {
Log.d( TAG, "onHandleWorkImpl(): BT disabled so doing nothing" );
} }
} // onHandleWorkImpl() } // onHandleWorkImpl()
@ -525,7 +529,6 @@ public class BTService extends XWJIService {
static void startYourself() static void startYourself()
{ {
Log.d( TAG, "startYourself()" );
synchronized ( s_listener ) { synchronized ( s_listener ) {
if ( s_listener[0] == null ) { if ( s_listener[0] == null ) {
s_listener[0] = new BTListenerThread(); s_listener[0] = new BTListenerThread();
@ -732,7 +735,7 @@ public class BTService extends XWJIService {
final String className = getClass().getSimpleName(); final String className = getClass().getSimpleName();
final AtomicInteger nDone = new AtomicInteger(); final AtomicInteger nDone = new AtomicInteger();
Log.d( TAG, "%s.run() starting", className ); Log.d( TAG, "%s.run() starting", className );
while ( !mFinishing ) { while ( !mFinishing && BTEnabled() ) {
try { try {
List<PacketAccumulator> pas = getHasData(); // blocks List<PacketAccumulator> pas = getHasData(); // blocks
Thread[] threads = new Thread[pas.size()]; Thread[] threads = new Thread[pas.size()];
@ -1096,18 +1099,20 @@ public class BTService extends XWJIService {
{ {
long waitFromNow; long waitFromNow;
// Log.d( TAG, "getNextReadyMS() IN" ); // Log.d( TAG, "getNextReadyMS() IN" );
synchronized ( this ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
if ( 0 == mCmds.size() ) { // nothing to send synchronized ( this ) {
waitFromNow = Long.MAX_VALUE; if ( 0 == mCmds.size() ) { // nothing to send
} else if ( 0 == mFailCount ) { waitFromNow = Long.MAX_VALUE;
waitFromNow = 0; } else if ( 0 == mFailCount ) {
} else { waitFromNow = 0;
// If we're failing, use a backoff. } else {
long wait = 10 * 1000 * 2 * (1 + mFailCount); // If we're failing, use a backoff.
waitFromNow = wait - (System.currentTimeMillis() - mLastFailTime); long wait = 10 * 1000 * 2 * (1 + mFailCount);
} waitFromNow = wait - (System.currentTimeMillis() - mLastFailTime);
}
Log.d( TAG, "%s.getNextReadyMS() => %dms", this, waitFromNow ); Log.d( TAG, "%s.getNextReadyMS() => %dms", this, waitFromNow );
}
} }
return waitFromNow; return waitFromNow;
} }
@ -1115,9 +1120,11 @@ public class BTService extends XWJIService {
void setNoHost() void setNoHost()
{ {
// Log.d( TAG, "setNoHost() IN" ); // Log.d( TAG, "setNoHost() IN" );
synchronized ( this ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
mLastFailTime = System.currentTimeMillis(); synchronized ( this ) {
++mFailCount; mLastFailTime = System.currentTimeMillis();
++mFailCount;
}
} }
// Log.d( TAG, "setNoHost() OUT" ); // Log.d( TAG, "setNoHost() OUT" );
} }
@ -1140,47 +1147,49 @@ public class BTService extends XWJIService {
List<BTCmd> localCmds = null; List<BTCmd> localCmds = null;
List<Integer> localGameIDs = null; List<Integer> localGameIDs = null;
synchronized ( this ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
byte[] data = mOP.bos.toByteArray(); synchronized ( this ) {
if ( 0 < data.length ) { byte[] data = mOP.bos.toByteArray();
try { if ( 0 < data.length ) {
// Format is <proto><len-of-rest><msgCount><msgsData> To try {
// insert the count at the beginning we have to create a // Format is <proto><len-of-rest><msgCount><msgsData> To
// whole new byte array since there's no random access. // insert the count at the beginning we have to create a
// whole new byte array since there's no random access.
OutputPair tmpOP = new OutputPair(); OutputPair tmpOP = new OutputPair();
tmpOP.dos.writeByte( mCmds.size() ); // count of messages tmpOP.dos.writeByte( mCmds.size() ); // count of messages
Log.d( TAG, "writeAndCheck(): wrote msg count: %d", mCmds.size() ); Log.d( TAG, "writeAndCheck(): wrote msg count: %d", mCmds.size() );
tmpOP.dos.write( data, 0, data.length ); tmpOP.dos.write( data, 0, data.length );
data = tmpOP.bos.toByteArray(); // replace data data = tmpOP.bos.toByteArray(); // replace data
// now write to the socket // now write to the socket
Assert.assertNotNull( dos ); Assert.assertNotNull( dos );
// dos.writeByte( BT_PROTO ); // dos.writeByte( BT_PROTO );
dos.writeShort( data.length ); dos.writeShort( data.length );
dos.write( data, 0, data.length ); dos.write( data, 0, data.length );
dos.flush(); dos.flush();
Log.d( TAG, "writeAndCheck(): wrote %d-byte payload with sum %s", Log.d( TAG, "writeAndCheck(): wrote %d-byte payload with sum %s",
data.length, Utils.getMD5SumFor( data ) ); data.length, Utils.getMD5SumFor( data ) );
// If we get this far, we're going to assume the send has // If we get this far, we're going to assume the send has
// succeeded. Now we need to get out of the synchronized // succeeded. Now we need to get out of the synchronized
// block because handling all the reads on the socket can // block because handling all the reads on the socket can
// take a long time and will block lots of stuff // take a long time and will block lots of stuff
localCmds = mCmds; localCmds = mCmds;
localGameIDs = mGameIDs; localGameIDs = mGameIDs;
// Now read responses // Now read responses
// int nCmds = mCmds.size(); // int nCmds = mCmds.size();
// On remote, socket is being passed through // On remote, socket is being passed through
// onHandleWork. Could take a f*ck of a long time. Maybe we // onHandleWork. Could take a f*ck of a long time. Maybe we
// don't want this any more? // don't want this any more?
// try ( KillerIn killer = new KillerIn( socket, 5 * nCmds ) ) { // try ( KillerIn killer = new KillerIn( socket, 5 * nCmds ) ) {
// DataInputStream inStream = // DataInputStream inStream =
// new DataInputStream( socket.getInputStream() ); // new DataInputStream( socket.getInputStream() );
} catch ( IOException ioe ) { } catch ( IOException ioe ) {
Log.e( TAG, "writeAndCheck(): ioe: %s", ioe.getMessage() ); Log.e( TAG, "writeAndCheck(): ioe: %s", ioe.getMessage() );
}
} }
} }
} }
@ -1328,30 +1337,28 @@ public class BTService extends XWJIService {
private void append( BTCmd cmd, OutputPair op ) throws IOException private void append( BTCmd cmd, OutputPair op ) throws IOException
{ {
// Log.d( TAG, "append() IN" ); append( cmd, 0, op );
synchronized ( this ) {
append( cmd, 0, op );
}
// Log.d( TAG, "append() OUT" );
} }
private void append( BTCmd cmd, int gameID, OutputPair op ) throws IOException private void append( BTCmd cmd, int gameID, OutputPair op ) throws IOException
{ {
// Log.d( TAG, "append() IN" ); // Log.d( TAG, "append() IN" );
synchronized ( this ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
if ( 0 == mCmds.size() ) { synchronized ( this ) {
mStamp = System.currentTimeMillis(); if ( 0 == mCmds.size() ) {
mStamp = System.currentTimeMillis();
}
mCmds.add( cmd );
mGameIDs.add( gameID );
mOP.dos.writeByte( cmd.ordinal() );
byte[] data = op.bos.toByteArray();
mOP.dos.writeShort( data.length );
mOP.dos.write( data, 0, data.length );
mFailCount = 0; // for now, we restart timer on new data
tellSomebody();
} }
mCmds.add( cmd );
mGameIDs.add( gameID );
mOP.dos.writeByte( cmd.ordinal() );
byte[] data = op.bos.toByteArray();
mOP.dos.writeShort( data.length );
mOP.dos.write( data, 0, data.length );
mFailCount = 0; // for now, we restart timer on new data
tellSomebody();
} }
// Log.d( TAG, "append(%s): now %s", cmd, this ); // Log.d( TAG, "append(%s): now %s", cmd, this );
} }
@ -1359,8 +1366,10 @@ public class BTService extends XWJIService {
void resetBackoff() void resetBackoff()
{ {
// Log.d( TAG, "resetBackoff() IN" ); // Log.d( TAG, "resetBackoff() IN" );
synchronized ( this ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
mFailCount = 0; synchronized ( this ) {
mFailCount = 0;
}
} }
// Log.d( TAG, "resetBackoff() OUT" ); // Log.d( TAG, "resetBackoff() OUT" );
} }
@ -1379,8 +1388,10 @@ public class BTService extends XWJIService {
private void tellSomebody() private void tellSomebody()
{ {
// Log.d( TAG, "tellSomebody() IN" ); // Log.d( TAG, "tellSomebody() IN" );
synchronized ( sBlocker ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( sBlocker ) ) {
sBlocker.notifyAll(); synchronized ( sBlocker ) {
sBlocker.notifyAll();
}
} }
// Log.d( TAG, "tellSomebody() OUT" ); // Log.d( TAG, "tellSomebody() OUT" );
} }
@ -1483,14 +1494,16 @@ public class BTService extends XWJIService {
List<PacketAccumulator> result = new ArrayList<>(); List<PacketAccumulator> result = new ArrayList<>();
while ( 0 == result.size() ) { while ( 0 == result.size() ) {
long newMin = 5 * 60 * 1000; long newMin = 5 * 60 * 1000;
synchronized ( sSenders ) { try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( sSenders ) ) {
for ( String addr : sSenders.keySet() ) { synchronized ( sSenders ) {
PacketAccumulator pa = sSenders.get( addr ); for ( String addr : sSenders.keySet() ) {
long nextReady = pa.getNextReadyMS(); PacketAccumulator pa = sSenders.get( addr );
if ( nextReady <= 0 ) { long nextReady = pa.getNextReadyMS();
result.add( pa ); if ( nextReady <= 0 ) {
} else { result.add( pa );
newMin = Math.min( newMin, nextReady ); } else {
newMin = Math.min( newMin, nextReady );
}
} }
} }
} }

View file

@ -31,7 +31,6 @@ import android.text.format.Time;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Formatter; import java.util.Formatter;
import java.util.List; import java.util.List;
import java.util.Set;
import org.eehouse.android.xw4.loc.LocUtils; import org.eehouse.android.xw4.loc.LocUtils;
@ -168,4 +167,102 @@ public class DbgUtils {
// return dump.toString(); // return dump.toString();
// } // }
private static List<DeadlockWatch> sLockHolders = new ArrayList<>();
public static class DeadlockWatch extends Thread implements AutoCloseable {
private static final long DEFAULT_SLEEP_MS = 10 * 1000;
final private Object mOwner;
private long mStartStamp;
// private long mGotItTime = 0;
private boolean mCloseFired = false;
private String mStartStack;
// There's a race between this constructor and the synchronized()
// block that follows its try-with-resources. Oh well.
DeadlockWatch( Object syncObj )
{
mOwner = BuildConfig.DEBUG ? syncObj : null;
if ( BuildConfig.DEBUG ) {
mStartStack = android.util.Log.getStackTraceString(new Exception());
// Log.d( TAG, "__init(owner=%d): %s", mOwner.hashCode(), mStartStack );
mStartStamp = System.currentTimeMillis();
synchronized ( sLockHolders ) {
sLockHolders.add( this );
// Log.d( TAG, "added for owner %d", mOwner.hashCode() );
}
start();
}
}
// public void gotIt( Object obj )
// {
// if ( BuildConfig.DEBUG ) {
// Assert.assertTrue( obj == mOwner );
// mGotItTime = System.currentTimeMillis();
// // Log.d( TAG, "%s got lock after %dms", obj, mGotItTime - mStartStamp );
// }
// }
@Override
public void close()
{
if ( BuildConfig.DEBUG ) {
mCloseFired = true;
// Assert.assertTrue( 0 < mGotItTime ); // did you forget to call gotIt? :-)
}
}
@Override
public void run()
{
if ( BuildConfig.DEBUG ) {
long sleepMS = DEFAULT_SLEEP_MS;
try {
Thread.sleep( sleepMS );
if ( !mCloseFired ) {
DeadlockWatch likelyCulprit = null;
synchronized ( sLockHolders ) {
for ( DeadlockWatch sc : sLockHolders ) {
if ( sc.mOwner == mOwner && sc != this ) {
likelyCulprit = sc;
break;
}
}
}
String msg = new StringBuilder()
.append("timer fired!!!!")
.append( "lock sought by: " )
.append( mStartStack )
.append( "lock likely held by: " )
.append( likelyCulprit.mStartStack )
.toString();
CrashTrack.logAndSend( TAG, msg );
}
removeSelf();
} catch ( InterruptedException ie ) {
}
}
}
private void removeSelf()
{
if ( BuildConfig.DEBUG ) {
synchronized ( sLockHolders ) {
int start = sLockHolders.size();
// Log.d( TAG, "removing for owner %d", mOwner.hashCode() );
sLockHolders.remove( this );
Assert.assertTrue( start - 1 == sLockHolders.size() );
}
}
}
@Override
public String toString()
{
return super.toString() + "; startStack: " + mStartStack;
}
}
} }