Skip to content

Commit b4e4c83

Browse files
authored
Merge pull request #957 from catalyst/955-fix-compression
Issue #955: In compressor step, separate moving a file from the gzip command.
2 parents a82176f + 7daedd7 commit b4e4c83

File tree

2 files changed

+115
-41
lines changed

2 files changed

+115
-41
lines changed

classes/local/step/compression_trait.php

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,18 +234,17 @@ private function execute_method($config) {
234234
private function execute_gzip($config) {
235235
$gzip = get_config('tool_dataflows', 'gzip_exec_path');
236236
$from = escapeshellarg($config->from);
237-
$to = escapeshellarg($config->to);
238237

239238
$compressionmode = $config->command == 'decompress' ? '-d' : '';
240-
$movefilename = $config->command == 'compress' ? $config->from . '.gz' : rtrim($config->from, '.gz');
241-
$movefilename = escapeshellarg($movefilename);
239+
// File that gzip outputs to.
240+
$gzipoutfile = $config->command == 'compress' ? $config->from . '.gz' : rtrim($config->from, '.gz');
242241

243242
// See https://www.gnu.org/software/gzip/manual/html_node/Invoking-gzip.html.
244243
// -f: force override destination file if it exists
245244
// -v: verbose
246245
// -k: keep input file
247246
// 2>&1: pipe stderror to stdout.
248-
$gzipcommand = "{$gzip} -f -v -k {$compressionmode} {$from} 2>&1 && mv {$movefilename} {$to}";
247+
$gzipcommand = "{$gzip} -f -v -k {$compressionmode} {$from} 2>&1";
249248
$this->log->debug("Command: " . $gzipcommand);
250249

251250
// Execute the gzip command.
@@ -254,6 +253,15 @@ private function execute_gzip($config) {
254253
exec($gzipcommand, $output, $result);
255254
$success = $result === 0;
256255

256+
// Rename to the 'to' filename (if gzip was successful).
257+
if ($success && ($gzipoutfile !== $config->to)) {
258+
$success = $success && rename($gzipoutfile, $config->to);
259+
if (!$success) {
260+
$error = error_get_last();
261+
$output = [$error['message']];
262+
}
263+
}
264+
257265
// Emit in error logs.
258266
if (!$success) {
259267
return implode(PHP_EOL, $output);

tests/tool_dataflows_connector_compression_test.php

Lines changed: 103 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
use Symfony\Component\Yaml\Yaml;
2020
use tool_dataflows\local\execution\engine;
2121
use tool_dataflows\local\step\connector_compression;
22+
use tool_dataflows\local\step\connector_copy_file;
2223

2324
/**
2425
* Unit test for the compression connector step.
@@ -29,9 +30,11 @@
2930
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
3031
* @covers \tool_dataflows\local\step\connector_compression
3132
*/
32-
class tool_dataflows_connector_compression_test extends \advanced_testcase {
33-
/** @var string base test directory for files * */
34-
private $basedir;
33+
final class tool_dataflows_connector_compression_test extends \advanced_testcase {
34+
/** @var string base test directory for files when compressing * */
35+
private $basedircmp;
36+
/** @var string base test directory for files when decompressing * */
37+
private $basedirdecmp;
3538

3639
/**
3740
* Sets up tests
@@ -40,32 +43,46 @@ protected function setUp(): void {
4043
parent::setUp();
4144
$this->resetAfterTest();
4245

43-
$this->basedir = make_unique_writable_directory(make_temp_directory('tool_dataflows'));
44-
set_config('permitted_dirs', $this->basedir, 'tool_dataflows');
46+
$tempdir = make_temp_directory('tool_dataflows');
47+
$this->basedircmp = make_unique_writable_directory($tempdir);
48+
$this->basedirdecmp = make_unique_writable_directory($tempdir);
49+
set_config('permitted_dirs', $this->basedircmp . PHP_EOL . $this->basedirdecmp, 'tool_dataflows');
4550
set_config('gzip_exec_path', '/usr/bin/gzip', 'tool_dataflows');
4651
}
4752

4853
protected function tearDown(): void {
49-
$this->basedir = null;
54+
parent::tearDown();
55+
$this->basedircmp = null;
56+
$this->basedirdecmp = null;
5057
}
5158

5259
/**
5360
* Creates a test dataflow
5461
*
55-
* @param string $from from file
56-
* @param string $tocompressed the destination that $from gets compressed to
57-
* @param string $todecompressed the detination that $tocompressed gets decompressed to
58-
* @param string $method
62+
* This dataflow compresses a file, then copies it, then decompresses the copied file.
63+
*
64+
* @param string $compressfrom Original source file
65+
* @param string $compressto Output file for compression
66+
* @param string $decompressfrom File to be decompressed ($compressto copied)
67+
* @param string $decompressto Output file for decompression
68+
* @param string $method Compression algorithm to use.
69+
* @return dataflow
5970
*/
60-
private function create_test_dataflow(string $from, string $tocompressed, string $todecompressed, string $method) {
71+
private function create_test_dataflow(
72+
string $compressfrom,
73+
string $compressto,
74+
string $decompressfrom,
75+
string $decompressto,
76+
string $method = 'gzip'
77+
): dataflow {
6178
$dataflow = new dataflow();
6279
$dataflow->name = 'compression-connector-test';
6380
$dataflow->save();
6481

6582
$compress = new step();
6683
$compress->config = Yaml::dump([
67-
'from' => $from,
68-
'to' => $tocompressed,
84+
'from' => $compressfrom,
85+
'to' => $compressto,
6986
'method' => $method,
7087
'command' => 'compress',
7188
]);
@@ -75,59 +92,106 @@ private function create_test_dataflow(string $from, string $tocompressed, string
7592

7693
$dataflow->add_step($compress);
7794

95+
// We use a copy so that the decompression files will not interfere with compression files.
96+
$copyfile = new step();
97+
$copyfile->config = Yaml::dump([
98+
'from' => $compressto,
99+
'to' => $decompressfrom,
100+
]);
101+
$copyfile->depends_on([$compress]);
102+
$copyfile->name = 'copyfile';
103+
$copyfile->type = connector_copy_file::class;
104+
105+
$dataflow->add_step($copyfile);
106+
78107
$decompress = new step();
79108
$decompress->config = Yaml::dump([
80-
'from' => $tocompressed,
81-
'to' => $todecompressed,
109+
'from' => $decompressfrom,
110+
'to' => $decompressto,
82111
'method' => $method,
83112
'command' => 'decompress',
84113
]);
85114

86-
$decompress->depends_on([$compress]);
115+
$decompress->depends_on([$copyfile]);
87116
$decompress->name = 'decompress';
88117
$decompress->type = connector_compression::class;
89118

90119
$dataflow->add_step($decompress);
91120
return $dataflow;
92121
}
93122

123+
/**
124+
* Provider for test_gzip_compression_decompression.
125+
*
126+
* @return array[]
127+
*/
128+
public static function gzip_compression_decompression_provider(): array {
129+
return [
130+
'different filenames' => ['input.txt', 'output.txt.gz', 'output_data.txt'],
131+
'same filenames for compress' => ['input.txt', 'input.txt.gz', 'output_data.txt'],
132+
'same filenames for decompress' => ['input.txt', 'output.txt.gz', 'output.txt'],
133+
'same filenames' => ['input.txt', 'input.txt.gz', 'input.txt'],
134+
];
135+
}
136+
94137
/**
95138
* Test compression
139+
*
140+
* @param string $compressfrom Source file name
141+
* @param string $compressto Destination file name for compression. Doubles as the source name for decompression.
142+
* @param string $decompressto Destination file name fro decompression.
143+
* @dataProvider gzip_compression_decompression_provider
96144
*/
97-
public function test_gzip_compression_decompression() {
145+
public function test_gzip_compression_decompression(
146+
string $compressfrom,
147+
string $compressto,
148+
string $decompressto
149+
): void {
98150
// Ensure gzip is installed, otherwise we should skip the test.
99151
if (!is_executable(get_config('tool_dataflows', 'gzip_exec_path'))) {
100152
$this->markTestSkipped('gzip is not installed');
101153
return;
102154
}
103155

104-
$from = $this->basedir . '/input.txt';
105-
$tocompressed = $this->basedir . '/output.txt.gz';
106-
$todecompressed = $this->basedir . '/output_data.txt';
156+
// Source file for compression.
157+
$compressfrom = $this->basedircmp . '/' . $compressfrom;
158+
// Destination file for compression.
159+
$compressto = $this->basedircmp . '/' . $compressto;
160+
// Source file for decompression.
161+
$decompressfrom = $this->basedirdecmp . '/' . $compressto;
162+
// Destination file for decompression.
163+
$decompressto = $this->basedirdecmp . '/' . $decompressto;
107164

108165
$datatowrite = 'testdata';
109-
file_put_contents($from, $datatowrite);
166+
file_put_contents($compressfrom, $datatowrite);
110167

111-
// Input should exist (we just wrote to it), but the output should NOT exist yet.
112-
$this->assertTrue(is_file($from));
113-
$this->assertFalse(is_file($tocompressed));
114-
$this->assertFalse(is_file($todecompressed));
168+
// The original source file should exist (we just wrote to it), but the other files should NOT exist yet.
169+
$this->assertTrue(is_file($compressfrom));
170+
$this->assertFalse(is_file($compressto));
171+
$this->assertFalse(is_file($decompressfrom));
172+
$this->assertFalse(is_file($decompressto));
115173

116-
$dataflow = $this->create_test_dataflow($from, $tocompressed, $todecompressed, 'gzip');
174+
$dataflow = $this->create_test_dataflow(
175+
$compressfrom,
176+
$compressto,
177+
$decompressfrom,
178+
$decompressto
179+
);
117180

118181
ob_start();
119182
$engine = new engine($dataflow, false, false);
120183
$engine->execute();
121184
ob_get_clean();
122185

123-
// Check that the compressed file exists and also that the original file was left intact.
124-
$this->assertTrue(is_file($from));
125-
$this->assertTrue(is_file($tocompressed));
126-
$this->assertTrue(is_file($todecompressed));
186+
// Check that the output files exist and also that the input files were left intact.
187+
$this->assertTrue(is_file($compressfrom));
188+
$this->assertTrue(is_file($compressto));
189+
$this->assertTrue(is_file($decompressfrom));
190+
$this->assertTrue(is_file($decompressto));
127191

128192
// Check that the originally written data ended up the same in the decompressed file.
129-
$decompresseddata = file_get_contents($todecompressed);
130-
$this->assertEquals($datatowrite, $decompresseddata);
193+
$finaldata = file_get_contents($decompressto);
194+
$this->assertEquals($datatowrite, $finaldata);
131195

132196
$vars = $engine->get_variables_root()->get('steps.compress.vars');
133197
$this->assertTrue($vars->success);
@@ -139,17 +203,19 @@ public function test_gzip_compression_decompression() {
139203
/**
140204
* Tests gzip validation
141205
*/
142-
public function test_gzip_validation() {
206+
public function test_gzip_validation(): void {
143207
// Ensure gzip is installed, otherwise we should skip the test.
144208
if (!is_executable(get_config('tool_dataflows', 'gzip_exec_path'))) {
145209
$this->markTestSkipped('gzip is not installed');
146210
return;
147211
}
148212

149-
$from = $this->basedir . '/input.txt';
150-
$to = $this->basedir . '/output.txt.gz';
151-
$todecompressed = $this->basedir . '/output_new.txt';
152-
$dataflow = $this->create_test_dataflow($from, $to, $todecompressed, 'gzip');
213+
$compressfrom = $this->basedircmp . '/input.txt';
214+
$compressto = $this->basedircmp . '/output.txt.gz';
215+
$decompressfrom = $this->basedirdecmp . '/output.txt.gz';
216+
$decompressto = $this->basedirdecmp . '/output_new.txt';
217+
218+
$dataflow = $this->create_test_dataflow($compressfrom, $compressto, $decompressfrom, $decompressto);
153219
$step = $dataflow->get_steps()->compress;
154220

155221
// Initially the default gzip should be executable.

0 commit comments

Comments
 (0)