mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-22 07:28:16 +01:00
exit threads when BT off; add util to report deadlocks
I've seen a deadlock in the BT stuff. Now they'll be caught and reported via Crashlytics on DEBUG builds.
This commit is contained in:
parent
e23129c0e7
commit
1505a443ac
2 changed files with 193 additions and 83 deletions
|
@ -248,6 +248,8 @@ public class BTService extends XWJIService {
|
|||
if ( BTEnabled() ) {
|
||||
enqueueWork( context, BTService.class, sJobID, intent );
|
||||
// Log.d( TAG, "enqueueWork(%s)", cmdFrom( intent, BTAction.values() ) );
|
||||
} else {
|
||||
Log.d( TAG, "enqueueWork(): BT disabled so doing nothing" );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -498,6 +500,8 @@ public class BTService extends XWJIService {
|
|||
default:
|
||||
Assert.fail();
|
||||
}
|
||||
} else {
|
||||
Log.d( TAG, "onHandleWorkImpl(): BT disabled so doing nothing" );
|
||||
}
|
||||
} // onHandleWorkImpl()
|
||||
|
||||
|
@ -525,7 +529,6 @@ public class BTService extends XWJIService {
|
|||
|
||||
static void startYourself()
|
||||
{
|
||||
Log.d( TAG, "startYourself()" );
|
||||
synchronized ( s_listener ) {
|
||||
if ( s_listener[0] == null ) {
|
||||
s_listener[0] = new BTListenerThread();
|
||||
|
@ -732,7 +735,7 @@ public class BTService extends XWJIService {
|
|||
final String className = getClass().getSimpleName();
|
||||
final AtomicInteger nDone = new AtomicInteger();
|
||||
Log.d( TAG, "%s.run() starting", className );
|
||||
while ( !mFinishing ) {
|
||||
while ( !mFinishing && BTEnabled() ) {
|
||||
try {
|
||||
List<PacketAccumulator> pas = getHasData(); // blocks
|
||||
Thread[] threads = new Thread[pas.size()];
|
||||
|
@ -1096,18 +1099,20 @@ public class BTService extends XWJIService {
|
|||
{
|
||||
long waitFromNow;
|
||||
// Log.d( TAG, "getNextReadyMS() IN" );
|
||||
synchronized ( this ) {
|
||||
if ( 0 == mCmds.size() ) { // nothing to send
|
||||
waitFromNow = Long.MAX_VALUE;
|
||||
} else if ( 0 == mFailCount ) {
|
||||
waitFromNow = 0;
|
||||
} else {
|
||||
// If we're failing, use a backoff.
|
||||
long wait = 10 * 1000 * 2 * (1 + mFailCount);
|
||||
waitFromNow = wait - (System.currentTimeMillis() - mLastFailTime);
|
||||
}
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
|
||||
synchronized ( this ) {
|
||||
if ( 0 == mCmds.size() ) { // nothing to send
|
||||
waitFromNow = Long.MAX_VALUE;
|
||||
} else if ( 0 == mFailCount ) {
|
||||
waitFromNow = 0;
|
||||
} else {
|
||||
// If we're failing, use a backoff.
|
||||
long wait = 10 * 1000 * 2 * (1 + mFailCount);
|
||||
waitFromNow = wait - (System.currentTimeMillis() - mLastFailTime);
|
||||
}
|
||||
|
||||
Log.d( TAG, "%s.getNextReadyMS() => %dms", this, waitFromNow );
|
||||
Log.d( TAG, "%s.getNextReadyMS() => %dms", this, waitFromNow );
|
||||
}
|
||||
}
|
||||
return waitFromNow;
|
||||
}
|
||||
|
@ -1115,9 +1120,11 @@ public class BTService extends XWJIService {
|
|||
void setNoHost()
|
||||
{
|
||||
// Log.d( TAG, "setNoHost() IN" );
|
||||
synchronized ( this ) {
|
||||
mLastFailTime = System.currentTimeMillis();
|
||||
++mFailCount;
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
|
||||
synchronized ( this ) {
|
||||
mLastFailTime = System.currentTimeMillis();
|
||||
++mFailCount;
|
||||
}
|
||||
}
|
||||
// Log.d( TAG, "setNoHost() OUT" );
|
||||
}
|
||||
|
@ -1140,47 +1147,49 @@ public class BTService extends XWJIService {
|
|||
List<BTCmd> localCmds = null;
|
||||
List<Integer> localGameIDs = null;
|
||||
|
||||
synchronized ( this ) {
|
||||
byte[] data = mOP.bos.toByteArray();
|
||||
if ( 0 < data.length ) {
|
||||
try {
|
||||
// Format is <proto><len-of-rest><msgCount><msgsData> To
|
||||
// insert the count at the beginning we have to create a
|
||||
// whole new byte array since there's no random access.
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
|
||||
synchronized ( this ) {
|
||||
byte[] data = mOP.bos.toByteArray();
|
||||
if ( 0 < data.length ) {
|
||||
try {
|
||||
// Format is <proto><len-of-rest><msgCount><msgsData> To
|
||||
// insert the count at the beginning we have to create a
|
||||
// whole new byte array since there's no random access.
|
||||
|
||||
OutputPair tmpOP = new OutputPair();
|
||||
tmpOP.dos.writeByte( mCmds.size() ); // count of messages
|
||||
Log.d( TAG, "writeAndCheck(): wrote msg count: %d", mCmds.size() );
|
||||
tmpOP.dos.write( data, 0, data.length );
|
||||
data = tmpOP.bos.toByteArray(); // replace data
|
||||
OutputPair tmpOP = new OutputPair();
|
||||
tmpOP.dos.writeByte( mCmds.size() ); // count of messages
|
||||
Log.d( TAG, "writeAndCheck(): wrote msg count: %d", mCmds.size() );
|
||||
tmpOP.dos.write( data, 0, data.length );
|
||||
data = tmpOP.bos.toByteArray(); // replace data
|
||||
|
||||
// now write to the socket
|
||||
Assert.assertNotNull( dos );
|
||||
// dos.writeByte( BT_PROTO );
|
||||
dos.writeShort( data.length );
|
||||
dos.write( data, 0, data.length );
|
||||
dos.flush();
|
||||
Log.d( TAG, "writeAndCheck(): wrote %d-byte payload with sum %s",
|
||||
data.length, Utils.getMD5SumFor( data ) );
|
||||
// now write to the socket
|
||||
Assert.assertNotNull( dos );
|
||||
// dos.writeByte( BT_PROTO );
|
||||
dos.writeShort( data.length );
|
||||
dos.write( data, 0, data.length );
|
||||
dos.flush();
|
||||
Log.d( TAG, "writeAndCheck(): wrote %d-byte payload with sum %s",
|
||||
data.length, Utils.getMD5SumFor( data ) );
|
||||
|
||||
// If we get this far, we're going to assume the send has
|
||||
// succeeded. Now we need to get out of the synchronized
|
||||
// block because handling all the reads on the socket can
|
||||
// take a long time and will block lots of stuff
|
||||
// If we get this far, we're going to assume the send has
|
||||
// succeeded. Now we need to get out of the synchronized
|
||||
// block because handling all the reads on the socket can
|
||||
// take a long time and will block lots of stuff
|
||||
|
||||
localCmds = mCmds;
|
||||
localGameIDs = mGameIDs;
|
||||
localCmds = mCmds;
|
||||
localGameIDs = mGameIDs;
|
||||
|
||||
// Now read responses
|
||||
// int nCmds = mCmds.size();
|
||||
// On remote, socket is being passed through
|
||||
// onHandleWork. Could take a f*ck of a long time. Maybe we
|
||||
// don't want this any more?
|
||||
// try ( KillerIn killer = new KillerIn( socket, 5 * nCmds ) ) {
|
||||
// DataInputStream inStream =
|
||||
// new DataInputStream( socket.getInputStream() );
|
||||
} catch ( IOException ioe ) {
|
||||
Log.e( TAG, "writeAndCheck(): ioe: %s", ioe.getMessage() );
|
||||
// Now read responses
|
||||
// int nCmds = mCmds.size();
|
||||
// On remote, socket is being passed through
|
||||
// onHandleWork. Could take a f*ck of a long time. Maybe we
|
||||
// don't want this any more?
|
||||
// try ( KillerIn killer = new KillerIn( socket, 5 * nCmds ) ) {
|
||||
// DataInputStream inStream =
|
||||
// new DataInputStream( socket.getInputStream() );
|
||||
} catch ( IOException ioe ) {
|
||||
Log.e( TAG, "writeAndCheck(): ioe: %s", ioe.getMessage() );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1328,30 +1337,28 @@ public class BTService extends XWJIService {
|
|||
|
||||
private void append( BTCmd cmd, OutputPair op ) throws IOException
|
||||
{
|
||||
// Log.d( TAG, "append() IN" );
|
||||
synchronized ( this ) {
|
||||
append( cmd, 0, op );
|
||||
}
|
||||
// Log.d( TAG, "append() OUT" );
|
||||
append( cmd, 0, op );
|
||||
}
|
||||
|
||||
private void append( BTCmd cmd, int gameID, OutputPair op ) throws IOException
|
||||
{
|
||||
// Log.d( TAG, "append() IN" );
|
||||
synchronized ( this ) {
|
||||
if ( 0 == mCmds.size() ) {
|
||||
mStamp = System.currentTimeMillis();
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
|
||||
synchronized ( this ) {
|
||||
if ( 0 == mCmds.size() ) {
|
||||
mStamp = System.currentTimeMillis();
|
||||
}
|
||||
mCmds.add( cmd );
|
||||
mGameIDs.add( gameID );
|
||||
mOP.dos.writeByte( cmd.ordinal() );
|
||||
byte[] data = op.bos.toByteArray();
|
||||
mOP.dos.writeShort( data.length );
|
||||
mOP.dos.write( data, 0, data.length );
|
||||
|
||||
mFailCount = 0; // for now, we restart timer on new data
|
||||
tellSomebody();
|
||||
|
||||
}
|
||||
mCmds.add( cmd );
|
||||
mGameIDs.add( gameID );
|
||||
mOP.dos.writeByte( cmd.ordinal() );
|
||||
byte[] data = op.bos.toByteArray();
|
||||
mOP.dos.writeShort( data.length );
|
||||
mOP.dos.write( data, 0, data.length );
|
||||
|
||||
mFailCount = 0; // for now, we restart timer on new data
|
||||
tellSomebody();
|
||||
|
||||
}
|
||||
// Log.d( TAG, "append(%s): now %s", cmd, this );
|
||||
}
|
||||
|
@ -1359,8 +1366,10 @@ public class BTService extends XWJIService {
|
|||
void resetBackoff()
|
||||
{
|
||||
// Log.d( TAG, "resetBackoff() IN" );
|
||||
synchronized ( this ) {
|
||||
mFailCount = 0;
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( this ) ) {
|
||||
synchronized ( this ) {
|
||||
mFailCount = 0;
|
||||
}
|
||||
}
|
||||
// Log.d( TAG, "resetBackoff() OUT" );
|
||||
}
|
||||
|
@ -1379,8 +1388,10 @@ public class BTService extends XWJIService {
|
|||
private void tellSomebody()
|
||||
{
|
||||
// Log.d( TAG, "tellSomebody() IN" );
|
||||
synchronized ( sBlocker ) {
|
||||
sBlocker.notifyAll();
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( sBlocker ) ) {
|
||||
synchronized ( sBlocker ) {
|
||||
sBlocker.notifyAll();
|
||||
}
|
||||
}
|
||||
// Log.d( TAG, "tellSomebody() OUT" );
|
||||
}
|
||||
|
@ -1483,14 +1494,16 @@ public class BTService extends XWJIService {
|
|||
List<PacketAccumulator> result = new ArrayList<>();
|
||||
while ( 0 == result.size() ) {
|
||||
long newMin = 5 * 60 * 1000;
|
||||
synchronized ( sSenders ) {
|
||||
for ( String addr : sSenders.keySet() ) {
|
||||
PacketAccumulator pa = sSenders.get( addr );
|
||||
long nextReady = pa.getNextReadyMS();
|
||||
if ( nextReady <= 0 ) {
|
||||
result.add( pa );
|
||||
} else {
|
||||
newMin = Math.min( newMin, nextReady );
|
||||
try ( DbgUtils.DeadlockWatch dw = new DbgUtils.DeadlockWatch( sSenders ) ) {
|
||||
synchronized ( sSenders ) {
|
||||
for ( String addr : sSenders.keySet() ) {
|
||||
PacketAccumulator pa = sSenders.get( addr );
|
||||
long nextReady = pa.getNextReadyMS();
|
||||
if ( nextReady <= 0 ) {
|
||||
result.add( pa );
|
||||
} else {
|
||||
newMin = Math.min( newMin, nextReady );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import android.text.format.Time;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.eehouse.android.xw4.loc.LocUtils;
|
||||
|
||||
|
@ -168,4 +167,102 @@ public class DbgUtils {
|
|||
// return dump.toString();
|
||||
// }
|
||||
|
||||
private static List<DeadlockWatch> sLockHolders = new ArrayList<>();
|
||||
|
||||
public static class DeadlockWatch extends Thread implements AutoCloseable {
|
||||
private static final long DEFAULT_SLEEP_MS = 10 * 1000;
|
||||
final private Object mOwner;
|
||||
private long mStartStamp;
|
||||
// private long mGotItTime = 0;
|
||||
private boolean mCloseFired = false;
|
||||
private String mStartStack;
|
||||
|
||||
// There's a race between this constructor and the synchronized()
|
||||
// block that follows its try-with-resources. Oh well.
|
||||
DeadlockWatch( Object syncObj )
|
||||
{
|
||||
mOwner = BuildConfig.DEBUG ? syncObj : null;
|
||||
if ( BuildConfig.DEBUG ) {
|
||||
mStartStack = android.util.Log.getStackTraceString(new Exception());
|
||||
// Log.d( TAG, "__init(owner=%d): %s", mOwner.hashCode(), mStartStack );
|
||||
mStartStamp = System.currentTimeMillis();
|
||||
synchronized ( sLockHolders ) {
|
||||
sLockHolders.add( this );
|
||||
// Log.d( TAG, "added for owner %d", mOwner.hashCode() );
|
||||
}
|
||||
start();
|
||||
}
|
||||
}
|
||||
|
||||
// public void gotIt( Object obj )
|
||||
// {
|
||||
// if ( BuildConfig.DEBUG ) {
|
||||
// Assert.assertTrue( obj == mOwner );
|
||||
// mGotItTime = System.currentTimeMillis();
|
||||
// // Log.d( TAG, "%s got lock after %dms", obj, mGotItTime - mStartStamp );
|
||||
// }
|
||||
// }
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
if ( BuildConfig.DEBUG ) {
|
||||
mCloseFired = true;
|
||||
// Assert.assertTrue( 0 < mGotItTime ); // did you forget to call gotIt? :-)
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run()
|
||||
{
|
||||
if ( BuildConfig.DEBUG ) {
|
||||
long sleepMS = DEFAULT_SLEEP_MS;
|
||||
try {
|
||||
Thread.sleep( sleepMS );
|
||||
|
||||
if ( !mCloseFired ) {
|
||||
DeadlockWatch likelyCulprit = null;
|
||||
synchronized ( sLockHolders ) {
|
||||
for ( DeadlockWatch sc : sLockHolders ) {
|
||||
if ( sc.mOwner == mOwner && sc != this ) {
|
||||
likelyCulprit = sc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String msg = new StringBuilder()
|
||||
.append("timer fired!!!!")
|
||||
.append( "lock sought by: " )
|
||||
.append( mStartStack )
|
||||
.append( "lock likely held by: " )
|
||||
.append( likelyCulprit.mStartStack )
|
||||
.toString();
|
||||
CrashTrack.logAndSend( TAG, msg );
|
||||
}
|
||||
|
||||
removeSelf();
|
||||
} catch ( InterruptedException ie ) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void removeSelf()
|
||||
{
|
||||
if ( BuildConfig.DEBUG ) {
|
||||
synchronized ( sLockHolders ) {
|
||||
int start = sLockHolders.size();
|
||||
// Log.d( TAG, "removing for owner %d", mOwner.hashCode() );
|
||||
sLockHolders.remove( this );
|
||||
Assert.assertTrue( start - 1 == sLockHolders.size() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return super.toString() + "; startStack: " + mStartStack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue