Skip to content

Commit 3fb4ab3

Browse files
authored
Merge pull request #7601 from tangledbytes/utkarsh/add/migration-wal
[NC | NSFS] WAL based tape migrations and recalls
2 parents f665bd6 + c696970 commit 3fb4ab3

25 files changed

+1523
-83
lines changed

config.js

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,30 @@ config.NSFS_RENAME_RETRIES = 3;
722722
config.NSFS_VERSIONING_ENABLED = true;
723723
config.NSFS_UPDATE_ISSUES_REPORT_ENABLED = true;
724724

725+
config.NSFS_GLACIER_LOGS_DIR = '/var/run/noobaa-nsfs/wal';
726+
config.NSFS_GLACIER_LOGS_MAX_INTERVAL = 15 * 60 * 1000;
727+
728+
// NSFS_GLACIER_ENABLED can override internal autodetection and will force
729+
// the use of restore for all objects.
730+
config.NSFS_GLACIER_ENABLED = false;
731+
config.NSFS_GLACIER_LOGS_ENABLED = true;
732+
config.NSFS_GLACIER_BACKEND = 'TAPECLOUD';
733+
734+
// TAPECLOUD Glacier backend specific configs
735+
config.NSFS_GLACIER_TAPECLOUD_BIN_DIR = '/opt/ibm/tapecloud/bin';
736+
737+
// NSFS_GLACIER_MIGRATE_INTERVAL indicates the interval between runs
738+
// of `manage_nsfs glacier migrate`
739+
config.NSFS_GLACIER_MIGRATE_INTERVAL = 15 * 60 * 1000;
740+
741+
// NSFS_GLACIER_RESTORE_INTERVAL indicates the interval between runs
742+
// of `manage_nsfs glacier restore`
743+
config.NSFS_GLACIER_RESTORE_INTERVAL = 15 * 60 * 1000;
744+
745+
// NSFS_GLACIER_EXPIRY_INTERVAL indicates the interval between runs
746+
// of `manage_nsfs glacier expiry`
747+
config.NSFS_GLACIER_EXPIRY_INTERVAL = 12 * 60 * 60 * 1000;
748+
725749
////////////////////////////
726750
// NSFS NON CONTAINERIZED //
727751
////////////////////////////
@@ -742,11 +766,8 @@ config.BASE_MODE_CONFIG_DIR = 0o700;
742766

743767
config.NSFS_WHITELIST = [];
744768

745-
// NSFS_RESTORE_ENABLED can override internal autodetection and will force
746-
// the use of restore for all objects.
747-
config.NSFS_RESTORE_ENABLED = false;
748-
config.NSFS_HEALTH_ENDPOINT_RETRY_COUNT = 3
749-
config.NSFS_HEALTH_ENDPOINT_RETRY_DELAY = 10
769+
config.NSFS_HEALTH_ENDPOINT_RETRY_COUNT = 3;
770+
config.NSFS_HEALTH_ENDPOINT_RETRY_DELAY = 10;
750771

751772
//Quota
752773
config.QUOTA_LOW_THRESHOLD = 80;
@@ -961,4 +982,4 @@ module.exports.reload_nsfs_nc_config = reload_nsfs_nc_config;
961982
load_nsfs_nc_config();
962983
reload_nsfs_nc_config();
963984
load_config_local();
964-
load_config_env_overrides();
985+
load_config_env_overrides();
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# NSFS Glacier Storage Class
2+
3+
## Goal
4+
- Support "GLACIER" storage class in NooBaa which should behave similar to AWS "GLACIER" storage class.
5+
- NooBaa should allow limited support of `RestoreObject` API.
6+
7+
## Approach
8+
The current approach to support `GLACIER` storage class is to separate the implementation into two parts.
9+
Main NooBaa process only manages metadata on the files/objects via extended attributes and maintains relevant
10+
data in a log file. Another process (currently `manage_nsfs`) manages the actual movements of the files across
11+
disk and tape.
12+
13+
There are 3 primary flows of concern and this document will discuss all 3 of them:
14+
1. Upload object to `GLACIER` storage class (API: `PutObject`).
15+
2. Restore object that are uploaded to `GLACIER` storage class (API: `RestoreObject`).
16+
3. Copy objects where source is an object stored in `GLACIER` (API: `PutObject`).
17+
18+
### WAL
19+
Important component of all the flows is the write ahead log (WAL). NooBaa has a `SimpleWAL` which as name states
20+
is extremely simple in some senses. It does not deal with fsync issues, partial writes, holes, etc. rather just
21+
appends data seperated by a new line character.
22+
23+
`SimpleWAL` features:
24+
1. Exposes an `append` method which adds data to the file.
25+
2. Can perform auto rotation of the file which makes sure that a single WAL is never too huge for the
26+
WAL consumer to consume.
27+
3. Exposes a `process` method which allows "safe" iteration on the previous WAL files.
28+
4. Tries to make sure that no data loss happens due to process level races.
29+
30+
#### Races which are handled by the current implementation
31+
1. `n` processes open the WAL file while a "consumer" swoops and tries to process the file affectively losing the
32+
current writes (due to processing partially written file and ultimately invoking `unlink` on the file) - This isn't
33+
possible as `process` method makes sure that it doesn't iterate over the "current active file".
34+
2. `k` processes out of `n` (such that `k < n`) open the WAL while a "consumer" swoops and tries to process the
35+
file affectively losing the current writes (due to unliking the file others hold reference to) - Although `process`
36+
method will not protect against this as technically "current active file" is a different file but this is still **not**
37+
possible as the "consumer" need to have an "EXCLUSIVE" lock on the files before it can process the file this makes sure
38+
that for as long as any process is writing on the file, the "consumer" cannot consume the file and will block.
39+
3. `k` processes out of `n` (such that `k < n`) open the WAL but before the NSFS process could get a "SHARED" lock on
40+
the file the "consumer" process swoops in and process the files and then issues `unlink` on the file. The unlink will
41+
not delete the file as `k` processes have open FD to the file but as soon as those processes will be done writing to
42+
it and will close the FD, the file will be deleted which will result in lost writes - This isn't possible as `SimpleWAL`
43+
does not allow writing to a file till it can get a lock on the file and ensure that there are `> 0` links to the file.
44+
If there are no links then it tries to open file the again assuming that the consumer has issued `unlink` on the file
45+
it holds the FD to.
46+
4. Multiple processes try to swap the same file causing issues - This isn't possible as the process needs to acquire
47+
a "swap lock" before it performs the swap which essentially serializes the operations. Further the swapping is done only
48+
once by ensuring that the process only swaps if the current `inode` matches with the `inode` it got when it opened the
49+
file initially, if not it skips the swapping.
50+
51+
### Requirements for `TAPECLOUD` backend
52+
1. Scripts should be placed in `config.NSFS_GLACIER_TAPECLOUD_BIN_DIR` dir.
53+
2. `migrate` script should take a file name and perform migrations of the files mentioned in the given file. The output should comply with `eeadm migrate` command.
54+
3. `recall` script should take a file name and perform recall of the files mentioned in the given file. The output should comply with `eeadm recall` command.
55+
3. `task_show` script should take a task id as argument and output its status. The output should be similar to `eeadm task show -r <id>`.
56+
4. `scan_expired` should take a directory name and dump files in it. The files should have the names of all the files which need to be migrated back to disk. The names should be newline separated.
57+
5. `low_free_space` script should output `true` if the disk has low free space or else should return `false`.
58+
59+
### Flow 1: Upload Object to Glacier
60+
As mentioned earlier, any operation that is related to `GLACIER` are handled in 2 phases. One phase is immediate
61+
which is managed my the NSFS process itself while another phase is something which needs to be invoked seperately
62+
which manages the actual movements of the file.
63+
64+
#### Phase 1
65+
1. PutObject is requested with storage class set to `GLACIER`.
66+
2. NooBaa rejects the request if NooBaa isn't configured to support the given storage class. This is **not** enabled
67+
by default and needs to be enabled via `config-local.js` by setting `config.NSFS_GLACIER_ENABLED = true` and `config.NSFS_GLACIER_LOGS_ENABLED = true`.
68+
3. NooBaa will set the storage class to `GLACIER` by setting `user.storage_class` extended attribute.
69+
4. NooBaa creates a simple WAL (Write Ahead Log) and appends the filename to the log file.
70+
5. Completes the upload.
71+
72+
Once the upload is complete, the file sits on the disk till the second process kicks in and actually does the movement
73+
of the file but main NooBaa process does not concerns itself with the actual file state and rather just relies on the
74+
extended attributes to judge the state of the file. The implications of this is that NooBaa will refuse a file read operation
75+
even if the file is on disk unless the user explicitly issues a `RestoreObject` (It should be noted that this is what AWS
76+
does as well).
77+
78+
#### Phase 2
79+
1. A scheduler (eg. Cron, human, script, etc) issues `node src/cmd/manage_nsfs glacier migrate --interval <val>`.
80+
2. The command will first acquire an "EXCLUSIVE" lock so as to ensure that only one tape management command is running at once.
81+
3. Once the process has the lock it will start to iterate over the potentially currently inactive files.
82+
4. Before processing a WAL file, the proceess will get an "EXCLUSIVE" lock to the file ensuring that it is indeed the only
83+
process processing the file.
84+
5. It will read the WAL one line at a time and will ensure the following:
85+
1. The file still exists.
86+
2. The file is still has `GLACIER` storage class. (This is can happen if the user uploads another object with `STANDARD`
87+
storage class).
88+
3. The file doesn't have any of the `RestoreObject` extended attributes. This is to ensure that if the file was marked
89+
for restoration as soon as it was uploaded then we don't perform the migration at all. This is to avoid unnecessary
90+
work and also make sure that we don't end up racing with ourselves.
91+
6. Once a file name passes through all the above criterions then we add its name to a temporary WAL and handover the file
92+
name to `migrate` script which should be in `config.NSFS_GLACIER_TAPECLOUD_BIN_DIR` directory. We expect that the script will take the file name as its first parameter and will perform the migration. If the `config.NSFS_GLACIER_BACKEND` is set to `TAPECLOUD` (default) then we expect the script to output data in compliance with `eeadm migrate` command.
93+
7. We delete the temporary WAL that we created.
94+
8. We delete the WAL created by NSFS process **iff** there were no failures in `migrate`. In case of failures we skip the WAL
95+
deletion as a way to retry during the next trigger of the script. It should be noted that NooBaa's `migrate` (`TAPECLOUD` backend) invocation does **not** consider `DUPLICATE TASK` an error.
96+
97+
### Flow 2: Restore Object
98+
As mentioned earlier, any operation that is related to `GLACIER` are handled in 2 phases. One phase is immediate
99+
which is managed my the NSFS process itself while another phase is something which needs to be invoked seperately
100+
which manages the actual movements of the file.
101+
102+
#### Phase 1
103+
1. RestoreObject is requested with non-zero positive number of days.
104+
2. NooBaa rejects the request if NooBaa isn't configured to support the given storage class. This is **not** enabled
105+
by default and needs to be enabled via `config-local.js` by setting `config.NSFS_GLACIER_ENABLED = true` and `config.NSFS_GLACIER_LOGS_ENABLED = true`.
106+
3. NooBaa performs a number of checks to ensure that the operation is valid (for example there is no already ongoing
107+
restore request going on etc).
108+
4. NooBaa saves the filename to a simple WAL (Write Ahead Log).
109+
5. Returns the request with success indicating that the restore request has been accepted.
110+
111+
#### Phase 2
112+
1. A scheduler (eg. Cron, human, script, etc) issues `node src/cmd/manage_nsfs glacier restore --interval <val>`.
113+
2. The command will first acquire an "EXCLUSIVE" lock so as to ensure that only one tape management command is running at once.
114+
3. Once the process has the lock it will start to iterate over the potentially currently inactive files.
115+
4. Before processing a WAL file, the proceess will get an "EXCLUSIVE" lock to the file ensuring that it is indeed the only
116+
process processing the file.
117+
5. It will read the WAL one line at a time and will store the names of the files that we expect to fail during an eeadm restore
118+
(this can happen for example because a `RestoreObject` was issued for a file but later on that file was deleted before we could
119+
actually process the file).
120+
6. The WAL is handed over to `recall` script which should be present in `config.NSFS_GLACIER_TAPECLOUD_BIN_DIR` directory. We expect that the script will take the file name as its first parameter and will perform the recall. If the `config.NSFS_GLACIER_BACKEND` is set to `TAPECLOUD` (default) then we expect the script to output data in compliance with `eeadm recall` command.
121+
7. If we get any unexpected failures then we mark it a failure and make sure we do not delete the WAL file (so as to retry later).
122+
8. We iterate over the WAL again to set the final extended attributes. This is to make sure that we can communicate the latest with
123+
the NSFS processes.
124+
125+
### Flow 3: Copy Object with Glacier Object as copy source
126+
This is very similar to Flow 1 with some additional checks.
127+
If the source file is not in `GLACIER` storage class then normal procedure kicks in.
128+
If the source file is in `GLACIER` storage class then:
129+
- NooBaa refuses the copy if the file is not already restored (similar to AWS behaviour).
130+
- NooBaa accepts the copy if the file is already restored (similar to AWS behaviour).
131+

src/cmd/manage_nsfs.js

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ const ManageCLIError = require('../manage_nsfs/manage_nsfs_cli_errors').ManageCL
1818
const NSFS_CLI_ERROR_EVENT_MAP = require('../manage_nsfs/manage_nsfs_cli_errors').NSFS_CLI_ERROR_EVENT_MAP;
1919
const ManageCLIResponse = require('../manage_nsfs/manage_nsfs_cli_responses').ManageCLIResponse;
2020
const NSFS_CLI_SUCCESS_EVENT_MAP = require('../manage_nsfs/manage_nsfs_cli_responses').NSFS_CLI_SUCCESS_EVENT_MAP;
21+
const manage_nsfs_glacier = require('../manage_nsfs/manage_nsfs_glacier');
2122
const bucket_policy_utils = require('../endpoint/s3/s3_bucket_policy_utils');
2223
const nsfs_schema_utils = require('../manage_nsfs/nsfs_schema_utils');
2324
const { print_usage } = require('../manage_nsfs/manage_nsfs_help_utils');
2425
const { TYPES, ACTIONS, VALID_OPTIONS, OPTION_TYPE,
25-
LIST_ACCOUNT_FILTERS, LIST_BUCKET_FILTERS} = require('../manage_nsfs/manage_nsfs_constants');
26+
LIST_ACCOUNT_FILTERS, LIST_BUCKET_FILTERS, GLACIER_ACTIONS } = require('../manage_nsfs/manage_nsfs_constants');
2627
const NoobaaEvent = require('../manage_nsfs/manage_nsfs_events_utils').NoobaaEvent;
2728

2829
function throw_cli_error(error_code, detail, event_arg) {
@@ -105,6 +106,8 @@ async function main(argv = minimist(process.argv.slice(2))) {
105106
await bucket_management(argv, from_file);
106107
} else if (type === TYPES.IP_WHITELIST) {
107108
await whitelist_ips_management(argv);
109+
} else if (type === TYPES.GLACIER) {
110+
await glacier_management(argv);
108111
} else {
109112
// we should not get here (we check it before)
110113
throw_cli_error(ManageCLIError.InvalidType);
@@ -822,6 +825,8 @@ function validate_type_and_action(type, action) {
822825
if (!Object.values(ACTIONS).includes(action)) throw_cli_error(ManageCLIError.InvalidAction);
823826
} else if (type === TYPES.IP_WHITELIST) {
824827
if (action !== '') throw_cli_error(ManageCLIError.InvalidAction);
828+
} else if (type === TYPES.GLACIER) {
829+
if (!Object.values(GLACIER_ACTIONS).includes(action)) throw_cli_error(ManageCLIError.InvalidAction);
825830
}
826831
}
827832

@@ -838,6 +843,8 @@ function validate_no_extra_options(type, action, input_options) {
838843
valid_options = VALID_OPTIONS.bucket_options[action];
839844
} else if (type === TYPES.ACCOUNT) {
840845
valid_options = VALID_OPTIONS.account_options[action];
846+
} else if (type === TYPES.GLACIER) {
847+
valid_options = VALID_OPTIONS.glacier_options[action];
841848
} else {
842849
valid_options = VALID_OPTIONS.whitelist_options;
843850
}
@@ -942,6 +949,26 @@ function _validate_access_keys(argv) {
942949
})) throw_cli_error(ManageCLIError.AccountSecretKeyFlagComplexity);
943950

944951
}
952+
async function glacier_management(argv) {
953+
const action = argv._[1] || '';
954+
await manage_glacier_operations(action, argv);
955+
}
956+
957+
async function manage_glacier_operations(action, argv) {
958+
switch (action) {
959+
case GLACIER_ACTIONS.MIGRATE:
960+
await manage_nsfs_glacier.process_migrations();
961+
break;
962+
case GLACIER_ACTIONS.RESTORE:
963+
await manage_nsfs_glacier.process_restores();
964+
break;
965+
case GLACIER_ACTIONS.EXPIRY:
966+
await manage_nsfs_glacier.process_expiry();
967+
break;
968+
default:
969+
throw_cli_error(ManageCLIError.InvalidGlacierOperation);
970+
}
971+
}
945972

946973
exports.main = main;
947974
if (require.main === module) main();

src/deploy/NVA_build/standalone_deploy.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,19 @@ function execute() {
4444
fi
4545
}
4646

47+
function sigterm() {
48+
echo "SIGTERM received"
49+
kill -TERM $(jobs -p)
50+
exit 0
51+
}
52+
4753
function main() {
4854
if [ "${STANDALONE_SETUP_ENV}" = "true" ]; then
4955
setup_env
5056
fi
5157

58+
trap sigterm SIGTERM
59+
5260
# Start NooBaa processes
5361
execute "npm run web" web.log
5462
sleep 10

src/manage_nsfs/manage_nsfs_cli_errors.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,12 @@ ManageCLIError.InvalidAccountDistinguishedName = Object.freeze({
246246
message: 'Account distinguished name was not found',
247247
http_code: 400,
248248
});
249+
ManageCLIError.InvalidGlacierOperation = Object.freeze({
250+
code: 'InvalidGlacierOperation',
251+
message: 'only "migrate", "restore" and "expiry" subcommands are supported',
252+
http_code: 400,
253+
});
254+
249255

250256
////////////////////////
251257
//// BUCKET ERRORS /////

src/manage_nsfs/manage_nsfs_constants.js

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
const TYPES = {
55
ACCOUNT: 'account',
66
BUCKET: 'bucket',
7-
IP_WHITELIST: 'whitelist'
7+
IP_WHITELIST: 'whitelist',
8+
GLACIER: 'glacier',
89
};
910

1011
const ACTIONS = {
@@ -15,6 +16,12 @@ const ACTIONS = {
1516
STATUS: 'status'
1617
};
1718

19+
const GLACIER_ACTIONS = {
20+
MIGRATE: 'migrate',
21+
RESTORE: 'restore',
22+
EXPIRY: 'expiry',
23+
};
24+
1825
const GLOBAL_CONFIG_ROOT = 'config_root';
1926
const GLOBAL_CONFIG_OPTIONS = new Set(['from_file', GLOBAL_CONFIG_ROOT, 'config_root_backend']);
2027

@@ -34,11 +41,18 @@ const VALID_OPTIONS_BUCKET = {
3441
'status': new Set(['name', GLOBAL_CONFIG_ROOT]),
3542
};
3643

44+
const VALID_OPTIONS_GLACIER = {
45+
'migrate': new Set([ GLOBAL_CONFIG_ROOT]),
46+
'restore': new Set([ GLOBAL_CONFIG_ROOT]),
47+
'expiry': new Set([ GLOBAL_CONFIG_ROOT]),
48+
};
49+
3750
const VALID_OPTIONS_WHITELIST = new Set(['ips', GLOBAL_CONFIG_ROOT]);
3851

3952
const VALID_OPTIONS = {
4053
account_options: VALID_OPTIONS_ACCOUNT,
4154
bucket_options: VALID_OPTIONS_BUCKET,
55+
glacier_options: VALID_OPTIONS_GLACIER,
4256
whitelist_options: VALID_OPTIONS_WHITELIST,
4357
};
4458

@@ -70,6 +84,7 @@ const LIST_BUCKET_FILTERS = ['name'];
7084
// EXPORTS
7185
exports.TYPES = TYPES;
7286
exports.ACTIONS = ACTIONS;
87+
exports.GLACIER_ACTIONS = GLACIER_ACTIONS;
7388
exports.VALID_OPTIONS = VALID_OPTIONS;
7489
exports.OPTION_TYPE = OPTION_TYPE;
7590

0 commit comments

Comments
 (0)