diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 68ddfb0..37b14ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3162,14 +3162,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats, penultimateBlockMinReplication = blockManager.checkMinReplication(penultimateBlock); } - assert penultimateBlockState == BlockUCState.COMPLETE || - penultimateBlockState == BlockUCState.COMMITTED : - "Unexpected state of penultimate block in " + src; + + boolean expected=(penultimateBlockState == BlockUCState.COMPLETE || + penultimateBlockState == BlockUCState.COMMITTED); + + String unexpected_msg = "Unexpected state of penultimate block in " + src; + assert expected: unexpected_msg; + if (!expected) + LOG.error(unexpected_msg); switch(lastBlockState) { case COMPLETE: - assert false : "Already checked that the last block is incomplete"; - break; + String assert_msg = "Already checked that the last block is incomplete in " + src; + assert false : assert_msg; + throw new IOException(assert_msg); case COMMITTED: // Close file if committed blocks are minimally replicated if(penultimateBlockMinReplication && diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index d49311f..083d4e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -65,12 +65,17 @@ import static org.apache.hadoop.util.Time.now; */ @InterfaceAudience.Private public class LeaseManager { + + private static final String NAMENODE_LEASEMANAGER_PATHMAX_KEY = "dfs.namenode.leasemanager.pathmax"; + private static final long NAMENODE_LEASEMANAGER_PATHMAX_DEFAULT = 10000; + public static final Log LOG = LogFactory.getLog(LeaseManager.class); private final FSNamesystem fsnamesystem; private long softLimit = HdfsConstants.LEASE_SOFTLIMIT_PERIOD; private long hardLimit = HdfsConstants.LEASE_HARDLIMIT_PERIOD; + private final long release_path_max; // // Used for handling lock-leases @@ -89,7 +94,12 @@ public class LeaseManager { private Daemon lmthread; private volatile boolean shouldRunMonitor; - LeaseManager(FSNamesystem fsnamesystem) {this.fsnamesystem = fsnamesystem;} + LeaseManager(FSNamesystem fsnamesystem) { + this.fsnamesystem = fsnamesystem; + org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); + this.release_path_max = conf.getLong(NAMENODE_LEASEMANAGER_PATHMAX_KEY, + NAMENODE_LEASEMANAGER_PATHMAX_DEFAULT); + } Lease getLease(String holder) { return leases.get(holder); @@ -432,20 +442,24 @@ public class LeaseManager { private synchronized boolean checkLeases() { boolean needSync = false; assert fsnamesystem.hasWriteLock(); - for(; sortedLeases.size() > 0; ) { + long iters = 0; + for(int nleases = sortedLeases.size(); + nleases > 0; + nleases = sortedLeases.size()) { final Lease oldest = sortedLeases.first(); if (!oldest.expiredHardLimit()) { return needSync; } - LOG.info(oldest + " has expired hard limit"); + final int npaths = oldest.getPaths().size(); + LOG.warn(oldest + " has expired hard limit with " + npaths + "paths"); final List removing = new ArrayList(); // need to create a copy of the oldest lease paths, becuase // internalReleaseLease() removes paths corresponding to empty files, // i.e. it needs to modify the collection being iterated over // causing ConcurrentModificationException - String[] leasePaths = new String[oldest.getPaths().size()]; + String[] leasePaths = new String[npaths]; oldest.getPaths().toArray(leasePaths); for(String p : leasePaths) { try { @@ -472,6 +486,28 @@ public class LeaseManager { for(String p : removing) { removeLease(oldest, p); } + + /* Only take into account the batch work if we did some real + * work. This is just a hack for now, so that the NN keeps + * getting stuck in case it hits the "unremovable" lease + * problem */ + if (sortedLeases.size() < nleases) { + iters += npaths; + } else { + LOG.error("checkLeases loop invariant violated: we did not release any lease !", + new Throwable("infinite loop")); + } + + /* We don't want to be trapped here endlessly looping, + * because we're holding a lock. Get away as needed after a + * few iterations */ + if (iters >= release_path_max) { + LOG.warn("Breaking out of checkLeases() after " + iters + " iterations", + new Throwable("long loop")); + break; + } + + } return needSync; }