Commit 0a63c8db authored by Phoebe Buckheister's avatar Phoebe Buckheister 🦎

client: don't lock same directory twice during rename

if a file is renamed withing one directory, do not lock the same
directory entry info twice (once as source, once as target). if we lock
the same entry info twice, a concurrent revalidate (eg triggered by
stat) can get into this situation:

   rename                       stat

   eiRLock(from)
                                eiRLock(from.parent)
                                eiWLock(from) ** BLOCKS
   eiRLock(to (==from))

at which point both threads block and the directory is locked forever.

(cherry picked from commit 1b0cafaf367547721007392690e07392935f0ac4)
parent 2c4d0d3f
......@@ -1842,7 +1842,8 @@ int FhgfsOps_rename(struct inode* inodeDirFrom, struct dentry* dentryFrom,
"From: %s; To: %s", oldName, newName);
FhgfsInode_entryInfoReadLock(fhgfsFromDirInode); // LOCK EntryInfo
FhgfsInode_entryInfoReadLock(fhgfsToDirInode); // LOCK EntryInfo
if (fhgfsFromDirInode != fhgfsToDirInode)
FhgfsInode_entryInfoReadLock(fhgfsToDirInode); // LOCK EntryInfo
// note the fileInode also needs to be locked to prevent reference/release during a rename
FhgfsInode_entryInfoWriteLock(fhgfsFromEntryInode); // LOCK EntryInfo (renamed file dir)
......@@ -1885,7 +1886,8 @@ int FhgfsOps_rename(struct inode* inodeDirFrom, struct dentry* dentryFrom,
}
FhgfsInode_entryInfoWriteUnlock(fhgfsFromEntryInode); // UNLOCK EntryInfo (renamed file dir)
FhgfsInode_entryInfoReadUnlock(fhgfsToDirInode); // UNLOCK ToDirEntryInfo
if (fhgfsFromDirInode != fhgfsToDirInode)
FhgfsInode_entryInfoReadUnlock(fhgfsToDirInode); // UNLOCK ToDirEntryInfo
FhgfsInode_entryInfoReadUnlock(fhgfsFromDirInode); // UNLOCK FromDirEntryInfo
LOG_DEBUG_FORMATTED(log, Log_DEBUG, logContext, "remoting complete. result: %d", (int)renameRes);
......
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/stat.h>
void* t0(void* p) {
for (;;) {
rename("dir/1", "dir/2");
rename("dir/2", "dir/1");
}
}
void* t1(void* p) {
for (;;) {
struct stat s;
stat("dir", &s);
}
}
int main() {
pthread_t t0h, t1h;
mkdir("dir", 0755);
close(open("dir/1", O_RDWR | O_CREAT | O_TRUNC, 0666));
pthread_create(&t0h, 0, t0, 0);
pthread_create(&t1h, 0, t1, 0);
pthread_join(t0h, 0);
pthread_join(t1h, 0);
}
# trigger the race between rename locking the same entry info twice for read and stat locking
# the same entry info for write.
# the race is usually reproduced within minutes if tuneDirSubentryCacheValidityMS=0 on the client.
# IMPORTANT: this is a stress test. ibe it failed (ie the race is triggered), the system is broken.
mount = ARGV[0]
duration = ARGV[1]
raise "need a mountpoint" unless mount
raise "need a duration" unless duration
test = caller_locations(0, 1)[0].path.gsub /.pq$/, ".c"
on node do; cd mount do; tempdir do
upload local: test
shell "gcc -std=c99 -pthread #{test.split("/").last}"
shell "! timeout -s9 #{duration} ./a.out"
end; end; end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment