Skip to content

Commit eddd649

Browse files
committed
[RFC] [WIP] Only unserialize Phar metadata when getMetadata() is called
In other words, don't automatically unserialize when the magic phar:// stream wrappers are used. RFC: https://wiki.php.net/rfc/phar_stop_autoloading_metadata See https://externals.io/message/110856 and https://bugs.php.net/bug.php?id=76774 This was refactored to add a phar_metadata_tracker for the following reasons: - The way to properly copy a zval was previously implicit and undocumented (e.g. is it a pointer to a raw string or an actual value) - Avoid unnecessary serialization and unserialization in the most common case - If a metadata value is serialized once while saving a new/modified phar file, this allows reusing the same serialized string. - Have as few ways to copy/clone/lazily parse metadata (etc.) as possible, so that code changes can be limited to only a few places in the future. - Performance is hopefully not a concern - copying a string should be faster than unserializing a value, and metadata should be rare in most cases. TODO: Add additional tests of edge cases writing phars after setMetadata
1 parent 19bf66a commit eddd649

File tree

10 files changed

+355
-294
lines changed

10 files changed

+355
-294
lines changed

ext/phar/phar.c

Lines changed: 138 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -229,20 +229,7 @@ void phar_destroy_phar_data(phar_archive_data *phar) /* {{{ */
229229
HT_INVALIDATE(&phar->virtual_dirs);
230230
}
231231

232-
if (Z_TYPE(phar->metadata) != IS_UNDEF) {
233-
if (phar->is_persistent) {
234-
if (phar->metadata_len) {
235-
/* for zip comments that are strings */
236-
free(Z_PTR(phar->metadata));
237-
} else {
238-
zval_internal_ptr_dtor(&phar->metadata);
239-
}
240-
} else {
241-
zval_ptr_dtor(&phar->metadata);
242-
}
243-
phar->metadata_len = 0;
244-
ZVAL_UNDEF(&phar->metadata);
245-
}
232+
phar_metadata_tracker_free(&phar->metadata_tracker);
246233

247234
if (phar->fp) {
248235
php_stream_close(phar->fp);
@@ -383,25 +370,7 @@ void destroy_phar_manifest_entry_int(phar_entry_info *entry) /* {{{ */
383370
entry->fp = 0;
384371
}
385372

386-
if (Z_TYPE(entry->metadata) != IS_UNDEF) {
387-
if (entry->is_persistent) {
388-
if (entry->metadata_len) {
389-
/* for zip comments that are strings */
390-
free(Z_PTR(entry->metadata));
391-
} else {
392-
zval_internal_ptr_dtor(&entry->metadata);
393-
}
394-
} else {
395-
zval_ptr_dtor(&entry->metadata);
396-
}
397-
entry->metadata_len = 0;
398-
ZVAL_UNDEF(&entry->metadata);
399-
}
400-
401-
if (entry->metadata_str.s) {
402-
smart_str_free(&entry->metadata_str);
403-
entry->metadata_str.s = NULL;
404-
}
373+
phar_metadata_tracker_free(&entry->metadata_tracker);
405374

406375
pefree(entry->filename, entry->is_persistent);
407376

@@ -600,49 +569,135 @@ int phar_open_parsed_phar(char *fname, size_t fname_len, char *alias, size_t ali
600569
/* }}}*/
601570

602571
/**
603-
* Parse out metadata from the manifest for a single file
604-
*
605-
* Meta-data is in this format:
606-
* [len32][data...]
607-
*
608-
* data is the serialized zval
572+
* Attempt to serialize the data.
573+
* Callers are responsible for handling EG(exception) if one occurs.
609574
*/
610-
int phar_parse_metadata(char **buffer, zval *metadata, uint32_t zip_metadata_len) /* {{{ */
575+
void phar_metadata_tracker_try_ensure_has_serialized_data(phar_metadata_tracker *tracker, int persistent) /* {{{ */
611576
{
612-
php_unserialize_data_t var_hash;
577+
php_serialize_data_t metadata_hash;
578+
smart_str metadata_str = {0};
579+
if (tracker->str || Z_ISUNDEF(tracker->val)) {
580+
/* Already has serialized the value or there is no value */
581+
return;
582+
}
583+
PHP_VAR_SERIALIZE_INIT(metadata_hash);
584+
php_var_serialize(&metadata_str, &tracker->val, &metadata_hash);
585+
PHP_VAR_SERIALIZE_DESTROY(metadata_hash);
586+
if (!metadata_str.s) {
587+
return;
588+
}
589+
if (persistent) {
590+
tracker->str = zend_string_init(ZSTR_VAL(metadata_str.s), ZSTR_LEN(metadata_str.s), persistent);
591+
zend_string_release(metadata_str.s);
592+
} else {
593+
tracker->str = metadata_str.s;
594+
}
595+
}
596+
/* }}} */
613597

614-
if (zip_metadata_len) {
615-
const unsigned char *p;
616-
unsigned char *p_buff = (unsigned char *)estrndup(*buffer, zip_metadata_len);
617-
p = p_buff;
598+
/**
599+
* Parse out metadata when phar_metadata_tracker_has_data is true
600+
*/
601+
int phar_metadata_tracker_unserialize_or_copy(const phar_metadata_tracker *tracker, zval *metadata, int persistent) /* {{{ */
602+
{
603+
if (Z_TYPE(tracker->val) == IS_UNDEF || persistent) {
604+
const unsigned char *start;
605+
php_unserialize_data_t var_hash;
606+
/* precondition: This has data */
607+
ZEND_ASSERT(tracker->str != NULL);
618608
ZVAL_NULL(metadata);
619609
PHP_VAR_UNSERIALIZE_INIT(var_hash);
610+
start = (const unsigned char*)ZSTR_VAL(tracker->str);
620611

621-
if (!php_var_unserialize(metadata, &p, p + zip_metadata_len, &var_hash)) {
622-
efree(p_buff);
612+
if (!php_var_unserialize(metadata, &start, start + ZSTR_LEN(tracker->str), &var_hash)) {
623613
PHP_VAR_UNSERIALIZE_DESTROY(var_hash);
624614
zval_ptr_dtor(metadata);
625615
ZVAL_UNDEF(metadata);
626616
return FAILURE;
627617
}
628-
efree(p_buff);
629618
PHP_VAR_UNSERIALIZE_DESTROY(var_hash);
630-
631-
if (PHAR_G(persist)) {
632-
/* lazy init metadata */
633-
zval_ptr_dtor(metadata);
634-
Z_PTR_P(metadata) = pemalloc(zip_metadata_len, 1);
635-
memcpy(Z_PTR_P(metadata), *buffer, zip_metadata_len);
636-
return SUCCESS;
637-
}
619+
return SUCCESS;
638620
} else {
639-
ZVAL_UNDEF(metadata);
621+
/* TODO: what is the current/expected behavior when fetching an object with getMetadata() and modifying a property? Previously, it was underdefined, and probably unimportant to support */
622+
ZVAL_COPY(metadata, &tracker->val);
640623
}
641624

642625
return SUCCESS;
643626
}
644627
/* }}}*/
645628

629+
/**
630+
* Check if this has any data, serialized or as a raw value.
631+
*/
632+
zend_bool phar_metadata_tracker_has_data(const phar_metadata_tracker *tracker) /* {{{ */
633+
{
634+
return Z_TYPE(tracker->val) != IS_UNDEF || tracker->str != NULL;
635+
}
636+
/* }}} */
637+
638+
/**
639+
* Free memory used to track the metadata and set all fields to be null/undef.
640+
*/
641+
void phar_metadata_tracker_free(phar_metadata_tracker *tracker) /* {{{ */
642+
{
643+
if (Z_TYPE(tracker->val) != IS_UNDEF) {
644+
zval_ptr_dtor(&tracker->val);
645+
ZVAL_UNDEF(&tracker->val);
646+
}
647+
if (tracker->str) {
648+
zend_string_release(tracker->str);
649+
tracker->str = NULL;
650+
}
651+
}
652+
/* }}} */
653+
654+
/**
655+
* Free memory used to track the metadata and set all fields to be null/undef.
656+
*/
657+
void phar_metadata_tracker_copy(phar_metadata_tracker *dest, const phar_metadata_tracker *source) /* {{{ */
658+
{
659+
ZEND_ASSERT(dest != source);
660+
phar_metadata_tracker_free(dest);
661+
662+
if (Z_TYPE(source->val) != IS_UNDEF) {
663+
ZVAL_COPY(&dest->val, &source->val);
664+
}
665+
if (source->str) {
666+
dest->str = zend_string_copy(source->str);
667+
}
668+
}
669+
/* }}} */
670+
671+
/**
672+
* Increment reference counts after a metadata entry was copied
673+
*/
674+
void phar_metadata_tracker_clone(phar_metadata_tracker *tracker) /* {{{ */
675+
{
676+
zval_copy_ctor(&tracker->val);
677+
if (tracker->str) {
678+
tracker->str = zend_string_copy(tracker->str);
679+
}
680+
}
681+
/* }}} */
682+
683+
/**
684+
* Parse out metadata from the manifest for a single file, saving it into a Z_PTR (or setting metadata to Z_UNDEF)
685+
*
686+
* Meta-data is in this format:
687+
* [len32][data...]
688+
*
689+
* data is the serialized zval
690+
*/
691+
void phar_parse_metadata_lazy(const char *buffer, phar_metadata_tracker *tracker, uint32_t zip_metadata_len, int persistent) /* {{{ */
692+
{
693+
phar_metadata_tracker_free(tracker);
694+
if (zip_metadata_len) {
695+
/* lazy init metadata */
696+
tracker->str = zend_string_init(buffer, zip_metadata_len, persistent);
697+
}
698+
}
699+
/* }}}*/
700+
646701
/**
647702
* Size of fixed fields in the manifest.
648703
* See: http://php.net/manual/en/phar.fileformat.phar.php
@@ -1028,7 +1083,6 @@ static int phar_parse_pharfile(php_stream *fp, char *fname, size_t fname_len, ch
10281083
/* check whether we have meta data, zero check works regardless of byte order */
10291084
SAFE_PHAR_GET_32(buffer, endbuffer, len);
10301085
if (mydata->is_persistent) {
1031-
mydata->metadata_len = len;
10321086
if (!len) {
10331087
/* FIXME: not sure why this is needed but removing it breaks tests */
10341088
SAFE_PHAR_GET_32(buffer, endbuffer, len);
@@ -1037,9 +1091,9 @@ static int phar_parse_pharfile(php_stream *fp, char *fname, size_t fname_len, ch
10371091
if(len > (size_t)(endbuffer - buffer)) {
10381092
MAPPHAR_FAIL("internal corruption of phar \"%s\" (trying to read past buffer end)");
10391093
}
1040-
if (phar_parse_metadata(&buffer, &mydata->metadata, len) == FAILURE) {
1041-
MAPPHAR_FAIL("unable to read phar metadata in .phar file \"%s\"");
1042-
}
1094+
/* Don't implicitly call unserialize() on potentially untrusted input unless getMetadata() is called directly. */
1095+
// XXX where is the raw data to unserialize being set
1096+
phar_parse_metadata_lazy(buffer, &mydata->metadata_tracker, len, mydata->is_persistent);
10431097
buffer += len;
10441098

10451099
/* set up our manifest */
@@ -1112,19 +1166,15 @@ static int phar_parse_pharfile(php_stream *fp, char *fname, size_t fname_len, ch
11121166
}
11131167

11141168
PHAR_GET_32(buffer, len);
1115-
if (entry.is_persistent) {
1116-
entry.metadata_len = len;
1117-
} else {
1118-
entry.metadata_len = 0;
1119-
}
11201169
if (len > (size_t)(endbuffer - buffer)) {
11211170
pefree(entry.filename, entry.is_persistent);
11221171
MAPPHAR_FAIL("internal corruption of phar \"%s\" (truncated manifest entry)");
11231172
}
1124-
if (phar_parse_metadata(&buffer, &entry.metadata, len) == FAILURE) {
1125-
pefree(entry.filename, entry.is_persistent);
1126-
MAPPHAR_FAIL("unable to read file metadata in .phar file \"%s\"");
1127-
}
1173+
/* Don't implicitly call unserialize() on potentially untrusted input unless getMetadata() is called directly. */
1174+
/* The same local variable entry is reused in a loop, so reset the state before reading data. */
1175+
ZVAL_UNDEF(&entry.metadata_tracker.val);
1176+
entry.metadata_tracker.str = NULL;
1177+
phar_parse_metadata_lazy(buffer, &entry.metadata_tracker, len, entry.is_persistent);
11281178
buffer += len;
11291179

11301180
entry.offset = entry.offset_abs = offset;
@@ -1133,39 +1183,21 @@ static int phar_parse_pharfile(php_stream *fp, char *fname, size_t fname_len, ch
11331183
switch (entry.flags & PHAR_ENT_COMPRESSION_MASK) {
11341184
case PHAR_ENT_COMPRESSED_GZ:
11351185
if (!PHAR_G(has_zlib)) {
1136-
if (Z_TYPE(entry.metadata) != IS_UNDEF) {
1137-
if (entry.is_persistent) {
1138-
free(Z_PTR(entry.metadata));
1139-
} else {
1140-
zval_ptr_dtor(&entry.metadata);
1141-
}
1142-
}
1186+
phar_metadata_tracker_free(&entry.metadata_tracker);
11431187
pefree(entry.filename, entry.is_persistent);
11441188
MAPPHAR_FAIL("zlib extension is required for gz compressed .phar file \"%s\"");
11451189
}
11461190
break;
11471191
case PHAR_ENT_COMPRESSED_BZ2:
11481192
if (!PHAR_G(has_bz2)) {
1149-
if (Z_TYPE(entry.metadata) != IS_UNDEF) {
1150-
if (entry.is_persistent) {
1151-
free(Z_PTR(entry.metadata));
1152-
} else {
1153-
zval_ptr_dtor(&entry.metadata);
1154-
}
1155-
}
1193+
phar_metadata_tracker_free(&entry.metadata_tracker);
11561194
pefree(entry.filename, entry.is_persistent);
11571195
MAPPHAR_FAIL("bz2 extension is required for bzip2 compressed .phar file \"%s\"");
11581196
}
11591197
break;
11601198
default:
11611199
if (entry.uncompressed_filesize != entry.compressed_filesize) {
1162-
if (Z_TYPE(entry.metadata) != IS_UNDEF) {
1163-
if (entry.is_persistent) {
1164-
free(Z_PTR(entry.metadata));
1165-
} else {
1166-
zval_ptr_dtor(&entry.metadata);
1167-
}
1168-
}
1200+
phar_metadata_tracker_free(&entry.metadata_tracker);
11691201
pefree(entry.filename, entry.is_persistent);
11701202
MAPPHAR_FAIL("internal corruption of phar \"%s\" (compressed and uncompressed size does not match for uncompressed entry)");
11711203
}
@@ -2673,9 +2705,11 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
26732705

26742706
/* compress as necessary, calculate crcs, serialize meta-data, manifest size, and file sizes */
26752707
main_metadata_str.s = NULL;
2676-
if (Z_TYPE(phar->metadata) != IS_UNDEF) {
2708+
if (phar->metadata_tracker.str) {
2709+
smart_str_appendl(&main_metadata_str, ZSTR_VAL(phar->metadata_tracker.str), ZSTR_LEN(phar->metadata_tracker.str));
2710+
} else if (!Z_ISUNDEF(phar->metadata_tracker.val)) {
26772711
PHP_VAR_SERIALIZE_INIT(metadata_hash);
2678-
php_var_serialize(&main_metadata_str, &phar->metadata, &metadata_hash);
2712+
php_var_serialize(&main_metadata_str, &phar->metadata_tracker.val, &metadata_hash);
26792713
PHP_VAR_SERIALIZE_DESTROY(metadata_hash);
26802714
}
26812715
new_manifest_count = 0;
@@ -2710,23 +2744,17 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
27102744
/* we use this to calculate API version, 1.1.1 is used for phars with directories */
27112745
has_dirs = 1;
27122746
}
2713-
if (Z_TYPE(entry->metadata) != IS_UNDEF) {
2714-
if (entry->metadata_str.s) {
2715-
smart_str_free(&entry->metadata_str);
2716-
}
2717-
entry->metadata_str.s = NULL;
2747+
if (Z_TYPE(entry->metadata_tracker.val) != IS_UNDEF && !entry->metadata_tracker.str) {
2748+
/* Assume serialization will succeed. TODO: Set error and throw if EG(exception) != NULL */
2749+
smart_str buf = {0};
27182750
PHP_VAR_SERIALIZE_INIT(metadata_hash);
2719-
php_var_serialize(&entry->metadata_str, &entry->metadata, &metadata_hash);
2751+
php_var_serialize(&buf, &entry->metadata_tracker.val, &metadata_hash);
27202752
PHP_VAR_SERIALIZE_DESTROY(metadata_hash);
2721-
} else {
2722-
if (entry->metadata_str.s) {
2723-
smart_str_free(&entry->metadata_str);
2724-
}
2725-
entry->metadata_str.s = NULL;
2753+
entry->metadata_tracker.str = buf.s;
27262754
}
27272755

27282756
/* 32 bits for filename length, length of filename, manifest + metadata, and add 1 for trailing / if a directory */
2729-
offset += 4 + entry->filename_len + sizeof(entry_buffer) + (entry->metadata_str.s ? ZSTR_LEN(entry->metadata_str.s) : 0) + (entry->is_dir ? 1 : 0);
2757+
offset += 4 + entry->filename_len + sizeof(entry_buffer) + (entry->metadata_tracker.str ? ZSTR_LEN(entry->metadata_tracker.str) : 0) + (entry->is_dir ? 1 : 0);
27302758

27312759
/* compress and rehash as necessary */
27322760
if ((oldfile && !entry->is_modified) || entry->is_dir) {
@@ -2916,6 +2944,7 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
29162944

29172945
/* now write the manifest */
29182946
ZEND_HASH_FOREACH_PTR(&phar->manifest, entry) {
2947+
const zend_string *metadata_str;
29192948
if (entry->is_deleted || entry->is_mounted) {
29202949
/* remove this from the new phar if deleted, ignore if mounted */
29212950
continue;
@@ -2960,11 +2989,12 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
29602989
phar_set_32(entry_buffer+8, entry->compressed_filesize);
29612990
phar_set_32(entry_buffer+12, entry->crc32);
29622991
phar_set_32(entry_buffer+16, entry->flags);
2963-
phar_set_32(entry_buffer+20, entry->metadata_str.s ? ZSTR_LEN(entry->metadata_str.s) : 0);
2992+
metadata_str = entry->metadata_tracker.str;
2993+
phar_set_32(entry_buffer+20, metadata_str ? ZSTR_LEN(metadata_str) : 0);
29642994

29652995
if (sizeof(entry_buffer) != php_stream_write(newfile, entry_buffer, sizeof(entry_buffer))
2966-
|| (entry->metadata_str.s &&
2967-
ZSTR_LEN(entry->metadata_str.s) != php_stream_write(newfile, ZSTR_VAL(entry->metadata_str.s), ZSTR_LEN(entry->metadata_str.s)))) {
2996+
|| (metadata_str &&
2997+
ZSTR_LEN(metadata_str) != php_stream_write(newfile, ZSTR_VAL(metadata_str), ZSTR_LEN(metadata_str)))) {
29682998
if (closeoldfile) {
29692999
php_stream_close(oldfile);
29703000
}

0 commit comments

Comments
 (0)