Commit 89a75bc0 authored by Phoebe Buckheister's avatar Phoebe Buckheister 🦎

fsck/meta: don't start resync immediatly after first repair action

many repair actions may follow after the first has executed, but repair
actions are not modsynced to the secondary while resync is running. set
the secondary of a group to BAD once a repair action is attempted on
the primary, and set all secondaries with repairs to NEEDS-RESYNC when
all repair actions have completed.

see #626

(cherry picked from commit 4ae6ba4c55db22c5cc5ce7a6792e12bd67a29b69)
parent cc5d6d91
......@@ -955,6 +955,19 @@ void ModeCheckFS::checkAndRepair()
return;
}
for (auto it = secondariesSetBad.begin(); it != secondariesSetBad.end(); ++it)
{
auto secondary = *it;
FsckTkEx::fsckOutput(">>> Setting metadata node " + StringTk::intToStr(secondary.val())
+ " to needs-resync", OutputOptions_LINEBREAK);
auto setRes = MsgHelperRepair::setNodeState(secondary, TargetConsistencyState_NEEDS_RESYNC);
if (setRes != FhgfsOpsErr_SUCCESS)
FsckTkEx::fsckOutput(std::string("Failed: ") + FhgfsOpsErrTk::toErrString(setRes),
OutputOptions_LINEBREAK);
}
if(errorCount > 0)
FsckTkEx::fsckOutput(">>> Found " + StringTk::int64ToStr(errorCount) + " errors <<< ",
OutputOptions_ADDLINEBREAKBEFORE | OutputOptions_LINEBREAK);
......@@ -990,7 +1003,8 @@ void ModeCheckFS::repairDanglingDirEntry(db::DirEntry& entry,
case FsckRepairAction_DELETEDENTRY: {
MsgHelperRepair::deleteDanglingDirEntries(fsckEntry.getSaveNodeID(),
fsckEntry.getIsBuddyMirrored(), &entries, &failedEntries);
fsckEntry.getIsBuddyMirrored(), &entries, &failedEntries,
secondariesSetBad);
if(failedEntries.empty() )
{
......@@ -1007,7 +1021,8 @@ void ModeCheckFS::repairDanglingDirEntry(db::DirEntry& entry,
// create mirrored inodes iff the dentry was mirrored. if a contdir with the same id exists,
// a previous check will have created an inode for it, leaving this dentry not dangling.
MsgHelperRepair::createDefDirInodes(fsckEntry.getSaveNodeID(), fsckEntry.getIsBuddyMirrored(),
{std::make_tuple(fsckEntry.getID(), fsckEntry.getIsBuddyMirrored())}, &createdInodes);
{std::make_tuple(fsckEntry.getID(), fsckEntry.getIsBuddyMirrored())}, &createdInodes,
secondariesSetBad);
this->database->getDirInodesTable()->insert(createdInodes);
......@@ -1038,7 +1053,7 @@ void ModeCheckFS::repairWrongInodeOwner(FsckDirInode& inode, UserPrompter& promp
FsckDirInodeList failed;
MsgHelperRepair::correctInodeOwners(inode.getSaveNodeID(), inode.getIsBuddyMirrored(),
&inodes, &failed);
&inodes, &failed, secondariesSetBad);
if(failed.empty() )
this->database->getDirInodesTable()->update(inodes);
......@@ -1073,7 +1088,8 @@ void ModeCheckFS::repairWrongInodeOwnerInDentry(std::pair<db::DirEntry, NumNodeI
FsckDirEntryList failed;
MsgHelperRepair::correctInodeOwnersInDentry(fsckEntry.getSaveNodeID(),
fsckEntry.getIsBuddyMirrored(), &dentries, &owners, &failed);
fsckEntry.getIsBuddyMirrored(), &dentries, &owners, &failed,
secondariesSetBad);
if(failed.empty() )
this->database->getDentryTable()->updateFieldsExceptParent(dentries);
......@@ -1109,7 +1125,7 @@ void ModeCheckFS::repairOrphanedDirInode(FsckDirInode& inode, UserPrompter& prom
FsckDirInodeList failed;
MsgHelperRepair::linkToLostAndFound(*this->lostAndFoundNode, &this->lostAndFoundInfo, &inodes,
&failed, &created);
&failed, &created, secondariesSetBad);
if(failed.empty() )
this->database->getDentryTable()->insert(created);
......@@ -1147,7 +1163,7 @@ void ModeCheckFS::repairOrphanedFileInode(FsckFileInode& inode, UserPrompter& pr
StringList failed;
MsgHelperRepair::deleteFileInodes(inode.getSaveNodeID(), inode.getIsBuddyMirrored(), inodes,
failed);
failed, secondariesSetBad);
if(failed.empty() )
this->database->getFileInodesTable()->remove(inodes);
......@@ -1224,7 +1240,7 @@ void ModeCheckFS::repairMissingContDir(FsckDirInode& inode, UserPrompter& prompt
StringList failed;
MsgHelperRepair::createContDirs(inode.getSaveNodeID(), inode.getIsBuddyMirrored(), &inodes,
&failed);
&failed, secondariesSetBad);
if(failed.empty() )
{
......@@ -1256,7 +1272,8 @@ void ModeCheckFS::repairOrphanedContDir(FsckContDir& dir, UserPrompter& prompt)
case FsckRepairAction_CREATEDEFAULTDIRINODE: {
FsckDirInodeList createdInodes;
MsgHelperRepair::createDefDirInodes(dir.getSaveNodeID(), dir.getIsBuddyMirrored(),
{std::make_tuple(dir.getID(), dir.getIsBuddyMirrored())}, &createdInodes);
{std::make_tuple(dir.getID(), dir.getIsBuddyMirrored())}, &createdInodes,
secondariesSetBad);
this->database->getDirInodesTable()->insert(createdInodes);
......@@ -1288,7 +1305,7 @@ void ModeCheckFS::repairWrongFileAttribs(std::pair<FsckFileInode, checks::InodeA
FsckFileInodeList failed;
MsgHelperRepair::updateFileAttribs(error.first.getSaveNodeID(),
error.first.getIsBuddyMirrored(), &inodes, &failed);
error.first.getIsBuddyMirrored(), &inodes, &failed, secondariesSetBad);
if(failed.empty() )
this->database->getFileInodesTable()->update(inodes);
......@@ -1325,7 +1342,7 @@ void ModeCheckFS::repairWrongDirAttribs(std::pair<FsckDirInode, checks::InodeAtt
error.first.setNumHardLinks(error.second.nlinks);
MsgHelperRepair::updateDirAttribs(error.first.getSaveNodeID(),
error.first.getIsBuddyMirrored(), &inodes, &failed);
error.first.getIsBuddyMirrored(), &inodes, &failed, secondariesSetBad);
if(failed.empty() )
this->database->getDirInodesTable()->update(inodes);
......@@ -1391,7 +1408,7 @@ void ModeCheckFS::repairDirEntryWithBrokenByIDFile(db::DirEntry& entry, UserProm
FsckDirEntryList failed;
MsgHelperRepair::recreateFsIDs(fsckEntry.getSaveNodeID(), fsckEntry.getIsBuddyMirrored(),
&dentries, &failed);
&dentries, &failed, secondariesSetBad);
if(failed.empty() )
{
......@@ -1430,7 +1447,7 @@ void ModeCheckFS::repairOrphanedDentryByIDFile(FsckFsID& id, UserPrompter& promp
FsckFileInodeList createdInodes;
MsgHelperRepair::recreateDentries(id.getSaveNodeID(), id.getIsBuddyMirrored(), &fsIDs,
&failed, &createdDentries, &createdInodes);
&failed, &createdDentries, &createdInodes, secondariesSetBad);
if(failed.empty() )
{
......
......@@ -49,6 +49,8 @@ class ModeCheckFS : public Mode
EntryInfo lostAndFoundInfo;
boost::shared_ptr<FsckDirInode> lostAndFoundInode;
std::set<NumNodeID> secondariesSetBad;
int initDatabase();
void printHeaderInformation();
void disposeUnusedFiles();
......
......@@ -29,6 +29,7 @@
#include <common/net/message/nodes/GetNodesRespMsg.h>
#include <common/net/message/nodes/GetTargetMappingsRespMsg.h>
#include <common/net/message/nodes/GetTargetStatesRespMsg.h>
#include <common/net/message/nodes/SetTargetConsistencyStatesRespMsg.h>
#include <net/message/nodes/HeartbeatMsgEx.h>
// storage messages
......@@ -71,6 +72,7 @@ NetMessage* NetMessageFactory::createFromMsgType(unsigned short msgType)
case NETMSGTYPE_GetMirrorBuddyGroupsResp: { msg = new GetMirrorBuddyGroupsRespMsg(); } break;
case NETMSGTYPE_GetTargetMappingsResp: { msg = new GetTargetMappingsRespMsg(); } break;
case NETMSGTYPE_GetTargetStatesResp: { msg = new GetTargetStatesRespMsg(); } break;
case NETMSGTYPE_SetTargetConsistencyStatesResp: { msg = new SetTargetConsistencyStatesRespMsg(); } break;
// storage messages
case NETMSGTYPE_FindOwnerResp: { msg = new FindOwnerRespMsg(); } break;
......
......@@ -3,6 +3,7 @@
#include <common/Common.h>
#include <common/nodes/Node.h>
#include <common/nodes/TargetStateInfo.h>
#include <common/fsck/FsckChunk.h>
#include <common/fsck/FsckDirEntry.h>
#include <common/fsck/FsckDirInode.h>
......@@ -11,17 +12,23 @@
class MsgHelperRepair
{
public:
static FhgfsOpsErr setNodeState(NumNodeID node, TargetConsistencyState state);
static void deleteDanglingDirEntries(NumNodeID node, bool isBuddyMirrored,
FsckDirEntryList* dentries, FsckDirEntryList* failedDeletes);
FsckDirEntryList* dentries, FsckDirEntryList* failedDeletes,
std::set<NumNodeID>& secondariesWithRepair);
static void createDefDirInodes(NumNodeID node, bool isBuddyMirrored,
const std::vector<std::tuple<std::string, bool>>& entries,
FsckDirInodeList* createdInodes);
FsckDirInodeList* createdInodes,
std::set<NumNodeID>& secondariesWithRepair);
static void correctInodeOwnersInDentry(NumNodeID node, bool isBuddyMirrored,
FsckDirEntryList* dentries, NumNodeIDList* owners, FsckDirEntryList* failedCorrections);
FsckDirEntryList* dentries, NumNodeIDList* owners, FsckDirEntryList* failedCorrections,
std::set<NumNodeID>& secondariesWithRepair);
static void correctInodeOwners(NumNodeID node, bool isBuddyMirrored,
FsckDirInodeList* dirInodes, FsckDirInodeList* failedCorrections);
FsckDirInodeList* dirInodes, FsckDirInodeList* failedCorrections,
std::set<NumNodeID>& secondariesWithRepair);
static void deleteFiles(NumNodeID node, bool isBuddyMirrored, FsckDirEntryList* dentries,
FsckDirEntryList* failedDeletes);
......@@ -32,27 +39,27 @@ class MsgHelperRepair
EntryInfo& outLostAndFoundEntryInfo);
static void linkToLostAndFound(Node& lostAndFoundNode, EntryInfo* lostAndFoundInfo,
FsckDirInodeList* dirInodes, FsckDirInodeList* failedInodes,
FsckDirEntryList* createdDentries);
FsckDirEntryList* createdDentries, std::set<NumNodeID>& secondariesWithRepair);
static void linkToLostAndFound(Node& lostAndFoundNode, EntryInfo* lostAndFoundInfo,
FsckFileInodeList* fileInodes, FsckFileInodeList* failedInodes,
FsckDirEntryList* createdDentries);
FsckDirEntryList* createdDentries, std::set<NumNodeID>& secondariesWithRepair);
static void createContDirs(NumNodeID node, bool isBuddyMirrored, FsckDirInodeList* inodes,
StringList* failedCreates);
StringList* failedCreates, std::set<NumNodeID>& secondariesWithRepair);
static void updateFileAttribs(NumNodeID node, bool isBuddyMirrored, FsckFileInodeList* inodes,
FsckFileInodeList* failedUpdates);
FsckFileInodeList* failedUpdates, std::set<NumNodeID>& secondariesWithRepair);
static void updateDirAttribs(NumNodeID node, bool isBuddyMirrored, FsckDirInodeList* inodes,
FsckDirInodeList* failedUpdates);
FsckDirInodeList* failedUpdates, std::set<NumNodeID>& secondariesWithRepair);
static void recreateFsIDs(NumNodeID node, bool isBuddyMirrored, FsckDirEntryList* dentries,
FsckDirEntryList* failedEntries);
FsckDirEntryList* failedEntries, std::set<NumNodeID>& secondariesWithRepair);
static void recreateDentries(NumNodeID node, bool isBuddyMirrored, FsckFsIDList* fsIDs,
FsckFsIDList* failedCreates, FsckDirEntryList* createdDentries,
FsckFileInodeList* createdInodes);
FsckFileInodeList* createdInodes, std::set<NumNodeID>& secondariesWithRepair);
static void fixChunkPermissions(Node& node, FsckChunkList& chunkList,
PathInfoList& pathInfoList, FsckChunkList& failedChunks);
static bool moveChunk(Node& node, FsckChunk& chunk, const std::string& moveTo,
bool allowOverwrite);
static void deleteFileInodes(NumNodeID node, bool isBuddyMirrored, FsckFileInodeList& inodes,
StringList& failedDeletes);
StringList& failedDeletes, std::set<NumNodeID>& secondariesWithRepair);
private:
......
......@@ -52,9 +52,6 @@ bool CreateDefDirInodesMsgEx::processIncoming(ResponseContext& ctx)
statData.getNumHardlinks(), stripeTargets, FsckStripePatternType_RAID0,
ownerNodeID, isBuddyMirrored, true, false);
createdInodes.push_back(fsckDirInode);
if (isBuddyMirrored)
BuddyCommTk::setBuddyNeedsResyncState(true);
}
else
failedInodeIDs.push_back(inodeID);
......
......@@ -57,9 +57,6 @@ bool CreateEmptyContDirsMsgEx::processIncoming(ResponseContext& ctx)
}
metaStore->releaseDir(dirID);
if (isBuddyMirrored)
BuddyCommTk::setBuddyNeedsResyncState(true);
}
ctx.sendResponse(CreateEmptyContDirsRespMsg(&failedIDs) );
......
......@@ -58,8 +58,6 @@ bool DeleteDirEntriesMsgEx::processIncoming(ResponseContext& ctx)
entryName + "; Err: " + FhgfsOpsErrTk::toErrString(unlinkRes));
failedEntries.push_back(*iter);
}
else if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
}
ctx.sendResponse(DeleteDirEntriesRespMsg(&failedEntries) );
......
......@@ -63,8 +63,6 @@ bool FixInodeOwnersInDentryMsgEx::processIncoming(ResponseContext& ctx)
" entryName: " + entryName);
failedEntries.push_back(*dentryIter);
}
else if (dentryIter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
if (parentDirInodeIsTemp)
SAFE_DELETE(parentDirInode);
......
......@@ -44,8 +44,6 @@ bool FixInodeOwnersMsgEx::processIncoming(ResponseContext& ctx)
+ entryID);
failedInodes.push_back(*iter);
}
else if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
}
ctx.sendResponse(FixInodeOwnersRespMsg(&failedInodes) );
......
......@@ -112,9 +112,6 @@ void LinkToLostAndFoundMsgEx::linkDirInodes(FsckDirInodeList* outFailedInodes,
ownerNodeID, FsckDirEntryType_DIRECTORY, false, localNodeNumID,
saveDevice, saveInode, lostAndFoundInfo->getIsBuddyMirrored());
outCreatedDirEntries->push_back(newFsckDirEntry);
if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
}
}
......
......@@ -135,9 +135,6 @@ bool RecreateDentriesMsgEx::processIncoming(ResponseContext& ctx)
+ iter->getID());
}
if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
metaStore->releaseDir(dirID);
}
......
......@@ -59,9 +59,6 @@ bool RecreateFsIDsMsgEx::processIncoming(ResponseContext& ctx)
failedEntries.push_back(*iter);
continue;
}
if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
}
ctx.sendResponse(RecreateFsIDsRespMsg(&failedEntries) );
......
......@@ -42,8 +42,6 @@ bool RemoveInodesMsgEx::processIncoming(ResponseContext& ctx)
if (rmRes != FhgfsOpsErr_SUCCESS)
failedIDList.push_back(entryID);
else if (isBuddyMirrored)
BuddyCommTk::setBuddyNeedsResyncState(true);
}
ctx.sendResponse(RemoveInodesRespMsg(std::move(failedIDList)));
......
......@@ -44,8 +44,6 @@ bool UpdateDirAttribsMsgEx::processIncoming(ResponseContext& ctx)
"entryID: " + dirID);
failedInodes.push_back(*iter);
}
else if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
}
ctx.sendResponse(UpdateDirAttribsRespMsg(&failedInodes) );
......
......@@ -44,8 +44,6 @@ bool UpdateFileAttribsMsgEx::processIncoming(ResponseContext& ctx)
"entryID: " + iter->getID());
failedInodes.push_back(*iter);
}
else if (iter->getIsBuddyMirrored())
BuddyCommTk::setBuddyNeedsResyncState(true);
/* only release it here, as refreshDynAttribs() also takes an inode reference and can
* do the reference from in-memory data then */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment