Commit 6b8e7b3a authored by Christian Mohrbacher's avatar Christian Mohrbacher

updated to release 6.16

parent 2933415d
......@@ -44,6 +44,9 @@ print_usage()
echo " -x Build with BEEGFS_DEBUG."
echo " -l F log to specific file"
echo " -K keep previously built files (no clean)"
echo " -o Use openssl library shipped with beegfs"
echo " -s Use sqlite3 library shipped with beegfs"
echo " -u Use cppunit library shipped with beegfs"
echo
echo "EXAMPLE:"
echo " $ `basename $0` -j 4 -p /tmp/my_beegfs_packages"
......@@ -69,9 +72,12 @@ run_cmd()
DRY_RUN=0
CLEAN_ONLY=0
CLIENT_ONLY=0
BUILD_OPENSSL=0
BUILD_CPPUNIT=0
BUILD_SQLITE=0
LOGFILE=
while getopts "hcdm:v:DCxj:p:l:K" opt; do
while getopts "hcdm:v:DCxj:p:l:Kosu" opt; do
case $opt in
h)
print_usage
......@@ -116,6 +122,15 @@ while getopts "hcdm:v:DCxj:p:l:K" opt; do
K)
DO_CLEAN=false
;;
o)
BUILD_OPENSSL=1
;;
s)
BUILD_SQLITE=1
;;
u)
BUILD_CPPUNIT=1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
print_usage
......@@ -197,9 +212,21 @@ fi
# build common, opentk and thirdparty, as others depend on them
if [ $CLIENT_ONLY -eq 0 ]; then
make_dep_lib $thirdparty "" "all cppunit sqlite openssl"
make_dep_lib $common
make_dep_lib $opentk
make_dep_lib $thirdparty "" all # mongoose and ticpp
if [ ${BUILD_OPENSSL} -eq 1 ]; then
make_dep_lib $thirdparty "" openssl
fi
if [ ${BUILD_CPPUNIT} -eq 1 ]; then
make_dep_lib $thirdparty "" cppunit
fi
if [ ${BUILD_SQLITE} -eq 1 ]; then
make_dep_lib $thirdparty "" sqlite
fi
make_dep_lib $opentk
make_dep_lib $common
fi
......
......@@ -367,6 +367,8 @@ $(call define_if_matches, KERNEL_HAS_COPY_FROM_ITER, "copy_from_iter", uio.h)
$(call define_if_matches, KERNEL_HAS_INIT_WORK_2, -F "INIT_WORK(_work, _func)", workqueue.h)
$(call define_if_matches, KERNEL_HAS_ALLOC_WORKQUEUE, "alloc_workqueue", workqueue.h)
$(call define_if_matches, KERNEL_HAS_WQ_RESCUER, "WQ_RESCUER", workqueue.h)
$(call define_if_matches, KERNEL_HAS_WAIT_QUEUE_ENTRY_T, "wait_queue_entry_t", wait.h)
$(call define_if_matches, KERNEL_HAS_CURRENT_FS_TIME, "current_fs_time", fs.h)
# inodeChangeRes was changed to setattr_prepare in vanilla 4.9
$(call define_if_matches, KERNEL_HAS_SETATTR_PREPARE, "int setattr_prepare", fs.h)
......
......@@ -114,6 +114,16 @@ check_function \
KERNEL_HAS_FILE_DENTRY \
linux/fs.h
check_function \
super_setup_bdi_name "int (struct super_block *sb, char *fmt, ...)" \
KERNEL_HAS_SUPER_SETUP_BDI_NAME \
linux/fs.h
check_function \
have_submounts "int (struct dentry *parent)" \
KERNEL_HAS_HAVE_SUBMOUNTS \
linux/dcache.h
# we have to communicate with the calling makefile somehow. since we can't really use the return
# code of this script, we'll echo a special string at the end of our output for the caller to
# detect and remove again.
......
......@@ -157,6 +157,19 @@
#define currentFsGroupID current_fsgid()
#endif
// in 4.13 wait_queue_t got renamed to wait_queue_entry_t
#if defined(KERNEL_HAS_WAIT_QUEUE_ENTRY_T)
typedef wait_queue_entry_t wait_queue_t;
#endif
#if !defined(KERNEL_HAS_CURRENT_FS_TIME)
static inline struct timespec current_fs_time(struct super_block *sb)
{
struct timespec now = current_kernel_time();
return timespec_trunc(now, sb->s_time_gran);
}
#endif
/* Defined by <linux/include/linux/uidgid.h> and already included by one of the headers, so
* no KernelFeatureDetection.mk detection required.
* Note: Not in OsCompat.h, as OsCompat depends on Common.h. */
......
......@@ -533,7 +533,7 @@ uint64_t FhgfsInode_generateInodeID(struct super_block* sb, const char* entryID,
if(unlikely(hashRes <= BEEGFS_INODE_MAXRESERVED_INO) )
hashRes = HashTk_hash(HASHTK_HALFMD4, hashBits, entryID, entryIDLen+1);
return hashRes;
return hashRes;
} break;
case INODEIDSTYLE_Hash32MD4:
......
......@@ -79,7 +79,18 @@ int FhgfsOps_revalidateIntent(struct dentry* dentry, unsigned flags)
FhgfsOpsHelper_logOp(Log_SPAM, app, dentry, inode, logContext);
if(!inode || !parentInode || is_bad_inode(inode) )
{
if(inode && S_ISDIR(inode->i_mode) )
{
if(have_submounts(dentry) )
goto cleanup_put_parent;
shrink_dcache_parent(dentry);
}
d_drop(dentry);
goto cleanup_put_parent;
}
// active dentry => remote-stat and local-compare
......@@ -121,6 +132,8 @@ int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dent
FhgfsInode* fhgfsInode = BEEGFS_INODE(inode);
bool cacheValid = FhgfsInode_isCacheValid(fhgfsInode, inode->i_mode, cfg);
int isValid = 0; // quasi-boolean (return value)
bool needDrop = false;
FhgfsIsizeHints iSizeHints;
......@@ -129,7 +142,10 @@ int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dent
if (cacheValid)
return 1;
{
isValid = 1;
return isValid;
}
if(IS_ROOT(dentry) )
fhgfsStatPtr = NULL;
......@@ -165,7 +181,10 @@ int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dent
FhgfsInode_entryInfoReadUnlock(parentFhgfsInode); // UNLOCK parentInfo
if (unlikely(remotingRes != FhgfsOpsErr_SUCCESS) )
return 0;
{
needDrop = true;
goto out;
}
if (outInfo.revalidateRes != FhgfsOpsErr_SUCCESS)
{
......@@ -173,7 +192,8 @@ int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dent
Logger_logErrFormatted(log, logContext, "Unexpected revalidate info missing: %s",
entryInfo->fileName);
return 0;
needDrop = true;
goto out;
}
// check the stat result here and set fhgfsStatPtr accordingly
......@@ -189,11 +209,21 @@ int __FhgfsOps_revalidateIntent(struct dentry* parentDentry, struct dentry* dent
entryInfo->fileName);
// now its getting difficult as there is an unexpected error
return 0;
needDrop = true;
goto out;
}
}
return !__FhgfsOps_refreshInode(app, inode, fhgfsStatPtr, &iSizeHints);
if (!__FhgfsOps_refreshInode(app, inode, fhgfsStatPtr, &iSizeHints) )
isValid = 1;
else
isValid = 0;
out:
if (needDrop)
d_drop(dentry);
return isValid;
}
/**
......
......@@ -180,7 +180,9 @@ int __FhgfsOps_constructFsInfo(struct super_block* sb, void* rawMountOptions)
*/
bdi->ra_pages = BEEGFS_DEFAULT_READAHEAD_PAGES;
#ifdef KERNEL_HAS_BDI_CAP_MAP_COPY
#if defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME) && !defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER)
res = super_setup_bdi_name(sb, BEEGFS_MODULE_NAME_STR);
#elif defined(KERNEL_HAS_BDI_CAP_MAP_COPY)
res = bdi_setup_and_register(bdi, BEEGFS_MODULE_NAME_STR, BDI_CAP_MAP_COPY);
#else
res = bdi_setup_and_register(bdi, BEEGFS_MODULE_NAME_STR);
......@@ -222,10 +224,15 @@ void __FhgfsOps_destructFsInfo(struct super_block* sb)
{
App* app = FhgfsOps_getApp(sb);
//call destroy iff not initialised/registered by super_setup_bdi_name
#if defined(KERNEL_HAS_SB_BDI)
#if !defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME) || defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER)
struct backing_dev_info* bdi = FhgfsOps_getBdi(sb);
bdi_destroy(bdi);
#endif
#endif
__FhgfsOps_uninitApp(app);
......
......@@ -45,7 +45,8 @@
#endif // memdup_user, LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
#if defined(KERNEL_HAS_SB_BDI) && !defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER)
#if defined(KERNEL_HAS_SB_BDI) && !defined(KERNEL_HAS_BDI_SETUP_AND_REGISTER) && \
!defined(KERNEL_HAS_SUPER_SETUP_BDI_NAME)
/*
* For use from filesystems to quickly init and register a bdi associated
* with dirty writeback
......@@ -350,3 +351,169 @@ int os_generic_write_checks(struct file* filp, loff_t* offset, size_t* size, int
return 0;
}
#endif
#ifndef KERNEL_HAS_HAVE_SUBMOUNTS
/**
* enum d_walk_ret - action to talke during tree walk
* @D_WALK_CONTINUE: contrinue walk
* @D_WALK_QUIT: quit walk
* @D_WALK_NORETRY: quit when retry is needed
* @D_WALK_SKIP: skip this dentry and its children
*/
enum d_walk_ret {
D_WALK_CONTINUE,
D_WALK_QUIT,
D_WALK_NORETRY,
D_WALK_SKIP,
};
/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
*/
static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
{
int *ret = data;
if (d_mountpoint(dentry)) {
*ret = 1;
return D_WALK_QUIT;
}
return D_WALK_CONTINUE;
}
/**
* d_walk - walk the dentry tree
* @parent: start of walk
* @data: data passed to @enter() and @finish()
* @enter: callback when first entering the dentry
* @finish: callback when successfully finished the walk
*
* The @enter() and @finish() callbacks are called with d_lock held.
*/
static void d_walk(struct dentry *parent, void *data,
enum d_walk_ret (*enter)(void *, struct dentry *),
void (*finish)(void *))
{
struct dentry *this_parent;
struct list_head *next;
unsigned seq = 0;
enum d_walk_ret ret;
bool retry = true;
again:
read_seqbegin_or_lock(&rename_lock, &seq);
this_parent = parent;
spin_lock(&this_parent->d_lock);
ret = enter(data, this_parent);
switch (ret) {
case D_WALK_CONTINUE:
break;
case D_WALK_QUIT:
case D_WALK_SKIP:
goto out_unlock;
case D_WALK_NORETRY:
retry = false;
break;
}
repeat:
next = this_parent->d_subdirs.next;
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
continue;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
ret = enter(data, dentry);
switch (ret) {
case D_WALK_CONTINUE:
break;
case D_WALK_QUIT:
spin_unlock(&dentry->d_lock);
goto out_unlock;
case D_WALK_NORETRY:
retry = false;
break;
case D_WALK_SKIP:
spin_unlock(&dentry->d_lock);
continue;
}
if (!list_empty(&dentry->d_subdirs)) {
spin_unlock(&this_parent->d_lock);
spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
rcu_read_lock();
ascend:
if (this_parent != parent) {
struct dentry *child = this_parent;
this_parent = child->d_parent;
spin_unlock(&child->d_lock);
spin_lock(&this_parent->d_lock);
/* might go back up the wrong parent if we have had a rename. */
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
/* go into the first sibling still alive */
do {
next = child->d_child.next;
if (next == &this_parent->d_subdirs)
goto ascend;
child = list_entry(next, struct dentry, d_child);
} while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
rcu_read_unlock();
goto resume;
}
if (need_seqretry(&rename_lock, seq))
goto rename_retry;
rcu_read_unlock();
if (finish)
finish(data);
out_unlock:
spin_unlock(&this_parent->d_lock);
done_seqretry(&rename_lock, seq);
return;
rename_retry:
spin_unlock(&this_parent->d_lock);
rcu_read_unlock();
BUG_ON(seq & 1);
if (!retry)
return;
seq = 1;
goto again;
}
/**
* have_submounts - check for mounts over a dentry
* @parent: dentry to check.
*
* Return true if the parent or its subdirectories contain
* a mount point
*/
int have_submounts(struct dentry *parent)
{
int ret = 0;
d_walk(parent, &ret, check_mount, NULL);
return ret;
}
#endif
......@@ -46,6 +46,10 @@ static inline int os_generic_permission(struct inode *inode, int mask);
extern int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, unsigned int cap);
#endif
#ifndef KERNEL_HAS_HAVE_SUBMOUNTS
extern int have_submounts(struct dentry *parent);
#endif
/**
* generic_permission() compatibility function
*
......
......@@ -220,7 +220,7 @@ void Logger::logErrGranted(const char* threadName, const char* context, const ch
getTimeStr(nowTime.getTimeS(), timeStr, LOGGER_TIMESTR_SIZE);
#ifdef BEEGFS_DEBUG_PROFILING
uint64_t timeMicroS = nowTime.getTimeMicroSecPart(); // additional ms info for timestamp
uint64_t timeMicroS = nowTime.getTimeMicroSecPart();// additional microsecond info for timestamp
fprintf(errFile, "(E) %s.%06ld %s [%s] >> %s\n", timeStr, (long) timeMicroS,
threadName, context, msg);
......
......@@ -750,8 +750,12 @@ void StorageTk::checkOrCreateOrigNodeIDFile(const std::string pathStr, std::stri
throw InvalidConfigException(outStream.str() );
}
std::string currentNodeIDFileStr = currentNodeID + "\n";
std::string currentNodeIDFileStr = currentNodeID + "\n"
"# This file was auto-generated and must not be modified. If your hostname has "
"changed, create a\n# copy of this file under the name \"nodeID\", keep the content "
"unchanged and restart this service.";
ssize_t writeRes = write(fd, currentNodeIDFileStr.c_str(), currentNodeIDFileStr.length() );
IGNORE_UNUSED_VARIABLE(writeRes);
fsync(fd);
......
......@@ -5,6 +5,8 @@
#include <common/net/message/storage/creating/MkLocalFileRespMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileMsg.h>
#include <common/net/message/storage/creating/UnlinkLocalFileRespMsg.h>
#include <common/storage/striping/StripePattern.h>
#include <common/toolkit/MathTk.h>
#include <common/toolkit/MessagingTk.h>
#include <net/msghelpers/MsgHelperMkFile.h>
#include <session/EntryLock.h>
......@@ -137,6 +139,22 @@ FhgfsOpsErr MkFileWithPatternMsgEx::mkMetaFile(DirInode& dir, MkFileDetails& mkD
// swap given preferred targets of stripe pattern with chosen targets
stripePattern->getStripeTargetIDsModifyable()->swap(stripeTargets);
// check if chunk size satisfies constraints
const unsigned chunkSize = stripePattern->getChunkSize();
if (!MathTk::isPowerOfTwo(chunkSize))
{
LOG_TOP(GENERAL, DEBUG, "Invalid chunk size: Must be a power of two.", chunkSize);
return FhgfsOpsErr_INTERNAL;
}
if (chunkSize < STRIPEPATTERN_MIN_CHUNKSIZE)
{
LOG_TOP(GENERAL, DEBUG, "Invalid chunk size: Below minimum size.",
chunkSize,
as("minChunkSize", STRIPEPATTERN_MIN_CHUNKSIZE));
return FhgfsOpsErr_INTERNAL;
}
FhgfsOpsErr makeRes = metaStore->mkNewMetaFile(dir, &mkDetails, std::move(stripePattern),
outEntryInfo, &inodeDiskData); // (note: internally deletes stripePattern)
......
......@@ -4,6 +4,8 @@
#include <common/net/message/storage/creating/MkFileMsg.h>
#include <common/net/message/storage/creating/MkFileRespMsg.h>
#include <common/storage/striping/Raid0Pattern.h>
#include <common/storage/striping/StripePattern.h>
#include <common/toolkit/MathTk.h>
#include <common/toolkit/MetadataTk.h>
#include <common/toolkit/NodesTk.h>
#include <common/toolkit/UnitTk.h>
......@@ -117,6 +119,18 @@ bool ModeCreateFile::initFileSettings(FileSettings* settings)
else
{
chunkSize = UnitTk::strHumanToInt64(iter->second);
if(!MathTk::isPowerOfTwo(chunkSize))
{
std::cerr << "Invalid value for " << MODECREATEFILE_ARG_CHUNKSIZE;
std::cerr << ": Must be a power of two." << std::endl;
return false;
}
if(chunkSize < STRIPEPATTERN_MIN_CHUNKSIZE)
{
std::cerr << "Invalid value for " << MODECREATEFILE_ARG_CHUNKSIZE;
std::cerr << ": Minimum chunk size is " << STRIPEPATTERN_MIN_CHUNKSIZE << "." << std::endl;
return false;
}
cfg->erase(iter);
}
......
......@@ -368,6 +368,10 @@ FhgfsOpsErr ModeAddMirrorBuddyGroup::doAutomaticMode()
"Please check the messages printed above."
<< std::endl;
// if used with --force, return success if retVal signals invalid values
if(retVal == FhgfsOpsErr_INVAL && cfgForce)
return FhgfsOpsErr_SUCCESS;
return retVal;
}
......
......@@ -254,6 +254,26 @@ void InternodeSyncer::updateTargetStatesAndBuddyGroups()
// Store old states for ChangeTargetConsistencyStatesMsg.
UInt8List oldConsistencyStates = targetConsistencyStates;
// before anything else is done, update the targetWasOffline flags in the resyncers. updating
// them later opens a window of opportunity where the target state store says "offline", but
// the resyncer has not noticed - which would erroneously not fail the resync.
{
UInt16ListConstIter targetID = targetIDs.begin();
UInt8ListConstIter targetState = targetReachabilityStates.begin();
while (targetID != targetIDs.end())
{
if (*targetState == TargetReachabilityState_OFFLINE)
{
BuddyResyncJob* const job = app->getBuddyResyncer()->getResyncJob(*targetID);
if (job)
job->setTargetOffline();
}
targetID++;
targetState++;
}
}
// Sync buddy groups here, because decideResync depends on it.
// This is not a problem because if pushing target states fails all targets will be
// (p)offline anyway.
......
......@@ -91,6 +91,7 @@ void BuddyResyncJob::run()
bool walkRes;
shallAbort.setZero();
targetWasOffline = false;
// delete sync candidates and gather queue; just in case there was something from a previous run
syncCandidates.clear();
......@@ -312,9 +313,17 @@ cleanup:
setStatus(BuddyResyncJobState_INTERRUPTED);
informBuddy();
}
else
if (syncErrors) // any file sync errors or success?
else if (syncErrors || targetWasOffline.read()) // any sync errors or success?
{
// we must set the buddy BAD if it has been offline during any period of time during which
// the resync was also running. we implicitly do this during resync proper, since resync
// slaves abort with errors if the target is offline. if the target goes offline *after*
// the last proper resync messages has been sent and comes *back* before we try to inform
// it we will never detect that it has been offline at all. concurrently executing
// messages (eg TruncFile) may run between our opportunities to detect the offline state
// and may fail to forward their actions *even though they should forward*. this would
// lead to an inconsistent secondary. since the target has gone offline, the only
// reasonable course of action is to fail to resync entirely.
setStatus(BuddyResyncJobState_ERRORS);
informBuddy();
}
......@@ -323,8 +332,19 @@ cleanup:
setStatus(BuddyResyncJobState_SUCCESS);
// delete timestamp override file if it exists
storageTargets->rmLastBuddyCommOverride(targetID);
storageTargets->setBuddyNeedsResync(targetID, false);
// so the target went offline between the previous check "syncErrors || targetWasOffline".
// any message that has tried to forward itself in the intervening time will have seen the
// offline state, but will have been unable to set the buddy to needs-resync because it
// still *is* needs-resync. the resync itself has been perfectly successful, but we have
// to start another one anyway once the target comes back to ensure that no information
// was lost.
storageTargets->setBuddyNeedsResync(targetID, targetWasOffline.read());
informBuddy();
if (targetWasOffline.read())
LOG(WARNING,
"Resync successful, but target went offline during finalization. "
"Setting target to needs-resync again.", targetID);
}
}
......
......@@ -41,6 +41,7 @@ class BuddyResyncJob : public PThread
AtomicUInt64 numDirsMatched;
AtomicInt16 shallAbort; // quasi-boolean
AtomicInt16 targetWasOffline;
bool checkTopLevelDir(std::string& path, int64_t lastBuddyCommTimeSecs);
bool walkDirs(std::string chunksPath, std::string relPath, int level,
......@@ -69,6 +70,11 @@ class BuddyResyncJob : public PThread
return status == BuddyResyncJobState_RUNNING;
}
void setTargetOffline()
{
targetWasOffline.set(1);
}
private:
void setStatus(BuddyResyncJobState status)
{
......
......@@ -138,8 +138,8 @@ FhgfsOpsErr BuddyResyncerFileSyncSlave::doResync(std::string& chunkPathStr, uint
int errCode = errno;
if(errCode == ENOENT)
{ // chunk was deleted => no error
// delete the mirror chunk before resync starts
{ // chunk was deleted => no error
// delete the mirror chunk and return
bool rmRes = removeBuddyChunkUnlocked(*node, buddyTargetID, chunkPathStr);
if (!rmRes) // rm failed; stop resync
......@@ -152,7 +152,7 @@ FhgfsOpsErr BuddyResyncerFileSyncSlave::doResync(std::string& chunkPathStr, uint
retVal = FhgfsOpsErr_INTERNAL;
}
}
else
else // error => log and return
{
LogContext(__func__).logErr(
"Open of chunk failed. chunkPath: " + chunkPathStr + "; targetID: "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment