From bb2de67c310d52dcdcfeed8ab685172bc2623ce0 Mon Sep 17 00:00:00 2001 From: janwas Date: Sat, 18 Dec 2004 14:45:04 +0000 Subject: [PATCH] fixed incorrect handling of extra fields (zip archives with extended file attributes failed to load) improved z_extract_cdfh/z_enum_files. This was SVN commit r1527. --- source/lib/res/zip.cpp | 277 +++++++++++++++++++---------------------- 1 file changed, 128 insertions(+), 149 deletions(-) diff --git a/source/lib/res/zip.cpp b/source/lib/res/zip.cpp index 2003e10783..9a2d579303 100755 --- a/source/lib/res/zip.cpp +++ b/source/lib/res/zip.cpp @@ -95,6 +95,7 @@ struct ZLoc static const char cdfh_id[] = "PK\1\2"; static const char lfh_id[] = "PK\3\4"; static const char ecdr_id[] = "PK\5\6"; +// lengths include the id field! const size_t CDFH_SIZE = 46; const size_t LFH_SIZE = 30; const size_t ECDR_SIZE = 22; @@ -102,7 +103,7 @@ const size_t ECDR_SIZE = 22; // return -1 if file is obviously not a valid Zip archive, // otherwise 0. used as early-out test in lookup_init (see call site). -static inline int z_validate(const void* file, size_t size) +static inline int z_validate(const u8* file, size_t size) { // make sure it's big enough to check the header and for // z_find_ecdr to succeed (if smaller, it's definitely bogus). @@ -114,39 +115,59 @@ static inline int z_validate(const void* file, size_t size) } +// scan for and return a pointer to a Zip record, or 0 if not found. +// is the expected position; we scan from there until EOF for +// the given ID (fourcc). (includes ID field) bytes must +// remain before EOF - this makes sure the record is completely in the file. +// used by z_find_ecdr and z_extract_cdfh. +static const u8* z_find_id(const u8* file, size_t size, const u8* start, const char id[5], size_t record_size) +{ + ssize_t bytes_left = (ssize_t)((file+size) - start - record_size); + + const u8* p = start; + // don't increment function argument directly, + // so we can warn the user if we had to scan. + + while(bytes_left-- >= 0) + { + // found it + if(*(u32*)p == *(u32*)id) + { +#ifndef NDEBUG + if(p != start) + debug_warn("z_find_id: archive damaged, but still found next record."); +#endif + return p; + } + + p++; + // be careful not to increment before comparison; + // id may already be at . + } + + // passed EOF, didn't find it. + debug_warn("z_find_id: archive corrupted, next record not found."); + return 0; +} + + // find "End of Central Dir Record" in file. // z_validate has made sure size >= ECDR_SIZE. // return -1 on failure (output param invalid), otherwise 0. -static int z_find_ecdr(const void* file, size_t size, const u8*& ecdr_) +static int z_find_ecdr(const u8* file, size_t size, const u8*& ecdr_) { // early out: check expected case (ECDR at EOF; no file comment) - const u8* ecdr = (const u8*)file + size - ECDR_SIZE; + const u8* ecdr = file + size - ECDR_SIZE; if(*(u32*)ecdr == *(u32*)&ecdr_id) goto found_ecdr; - { - // scan the last 66000 bytes of file for ecdr_id signature // (the Zip archive comment field, up to 64k, may follow ECDR). // if the zip file is < 66000 bytes, scan the whole file. - - size_t bytes_left = MIN(66000, size); - ecdr = (const u8*)file + size - bytes_left; - - while(bytes_left >= 4) - { - if(*(u32*)ecdr == *(u32*)&ecdr_id) - goto found_ecdr; - - // check next 4 bytes (unaligned!!) - ecdr++; - bytes_left--; - } - - // reached EOF and still haven't found the ECDR identifier. - return ERR_CORRUPTED; - - } + const u8* start = file + size - MIN(66000, size); + ecdr = z_find_id(file, size, start, ecdr_id, ECDR_SIZE); + if(!ecdr) + return ERR_CORRUPTED; found_ecdr: ecdr_ = ecdr; @@ -154,37 +175,6 @@ found_ecdr: } -#ifdef PARANOIA - -// make sure the LFH fields match those passed (from the CDFH). -// only used in PARANOIA builds - costs time when opening archives. -// return -1 on error or mismatch, otherwise 0. -static int z_verify_lfh(const void* file, const off_t lfh_ofs, const off_t file_ofs) -{ - assert(lfh_ofs < file_ofs); // header comes before file - - const u8* lfh = (const u8*)file + lfh_ofs; - - // LFH signature doesn't match - if(*(u32*)lfh != *(u32*)lfh_id) - return ERR_CORRUPTED; - - const u16 lfh_fn_len = read_le16(lfh+26); - const u16 lfh_e_len = read_le16(lfh+28); - - const off_t lfh_file_ofs = lfh_ofs + LFH_SIZE + lfh_fn_len + lfh_e_len; - - // CDFH and LFH are inconsistent => - // normal builds would return incorrect offsets. - if(file_ofs != lfh_file_ofs) - return ERR_CORRUPTED; - - return 0; -} - -#endif // #ifdef PARANOIA - - // // date conversion from DOS to Unix // @@ -203,7 +193,7 @@ static uint bits(uint num, uint lo_idx, uint hi_idx) static time_t convert_dos_date(u16 fatdate, u16 fattime) { - struct tm t; + struct tm t; // struct tm format: t.tm_sec = bits(fattime, 0,4) * 2; // [0,59] t.tm_min = bits(fattime, 5,10); // [0,59] t.tm_hour = bits(fattime, 11,15); // [0,23] @@ -212,6 +202,9 @@ static time_t convert_dos_date(u16 fatdate, u16 fattime) t.tm_year = bits(fatdate, 9,15) + 80; // since 1900 t.tm_isdst = -1; // unknown - let libc determine + assert(t.tm_year < 138); + // otherwise: totally bogus, and at the limit of 32-bit time_t + time_t ret = mktime(&t); if(ret == (time_t)-1) debug_warn("convert_dos_date: mktime failed"); @@ -222,32 +215,31 @@ static time_t convert_dos_date(u16 fatdate, u16 fattime) /////////////////////////////////////////////////////////////////////////////// -// if cdfh is valid and describes a file, extract its name, offset and size -// for use in z_enum_files (passes it to lookup). -// return -1 on error (output params invalid), or 0 on success. -static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, size_t& fn_len, ZLoc* loc) +enum z_extract_cdfh_ret { - // sanity check: did we even read the CDFH? - if(bytes_left < CDFH_SIZE) - { - debug_warn("z_extract_cdfh: CDFH not in buffer!"); - return -1; - } + Z_CDFH_ABORT = -1, // next CDFH not found; abort. + Z_CDFH_FILE_OK = 0, // valid file; add to lookup. + Z_CDFH_SKIPPED = 1 // not valid file, but have next CDFH; continue. +}; - // this is checked when advancing, - // but we need this for the first central dir entry. - if(*(u32*)cdfh != *(u32*)cdfh_id) - { - debug_warn("z_extract_cdfh: CDFH signature not found"); - return -1; - } +// read the current CDFH. if a valid file, return its filename and ZLoc. +// finally, advance to next CDFH. +// return -1 on error (output params invalid), or 0 on success. +// called by z_enum_files, which passes the output to lookup. +static int z_extract_cdfh(const u8* file, size_t size, // in + const u8*& cdfh, const char*& fn, size_t& fn_len, ZLoc* loc) // out +{ + // scan for next CDFH (at or beyond current cdfh position) + cdfh = z_find_id(file, size, cdfh, cdfh_id, CDFH_SIZE); + if(!cdfh) // no (further) CDFH found: + return Z_CDFH_ABORT; // caller will abort. // extract fields from CDFH - const u8 method = cdfh[10]; + const u16 method = read_le16(cdfh+10); const u16 fattime = read_le16(cdfh+12); const u16 fatdate = read_le16(cdfh+14); - const u32 csize_ = read_le32(cdfh+20); - const u32 ucsize_ = read_le32(cdfh+24); + const u32 csize = read_le32(cdfh+20); + const u32 ucsize = read_le32(cdfh+24); const u16 fn_len_ = read_le16(cdfh+28); const u16 e_len = read_le16(cdfh+30); const u16 c_len = read_le16(cdfh+32); @@ -255,40 +247,43 @@ static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, s const char* fn_ = (const char*)cdfh+CDFH_SIZE; // not 0-terminated! - // - // check if valid and data should actually be returned - // + // find corresponding LFH, needed to calculate file offset + // (its extra field may not match that reported by CDFH!). + // TODO: this is slow, due to seeking backwards. + // optimization: calculate only on demand (i.e. open, not mount)? + const u8* lfh = z_find_id(file, size, (u8*)file+lfh_ofs, lfh_id, LFH_SIZE); - // .. compression method is unknown (neither deflated nor stored) - if(method & ~8) - { - debug_warn("z_extract_cdfh: unknown compression method"); - return ERR_NOT_SUPPORTED; - } - // .. this is a directory entry; we only want files. - if(!csize_ && !ucsize_) - return -1; -#ifdef PARANOIA - // .. CDFH's file ofs doesn't match that reported by LFH. - // don't check this in normal builds - seeking between LFHs and - // central dir is slow. this happens if the headers differ for some - // reason; we'd notice anyway, because inflate will fail - // (since file offset is incorrect). - if(z_verify_lfh(file, lfh_ofs, file_ofs) != 0) - return -1; -#endif + // advance CDFH; we now know where the next CDFH entry should be, + // but will still scan ahead for its id on next call. + cdfh += CDFH_SIZE + fn_len_ + e_len + c_len; + // is this entry not a valid file? + if( + // compression method is unknown (neither deflated nor stored) + (method & ~8) || + // it's a directory entry (we only want files). + (!csize && !ucsize) || + // LFH signature not found + (!lfh) + ) + return Z_CDFH_SKIPPED; + + // get actual file ofs (see above) + const u16 lfh_fn_len = read_le16(lfh+26); + const u16 lfh_e_len = read_le16(lfh+28); + const off_t file_ofs = lfh_ofs + LFH_SIZE + lfh_fn_len + lfh_e_len; + // LFH doesn't have a comment field! // write out entry data fn = fn_; fn_len = fn_len_; - loc->ofs = (off_t)(lfh_ofs + LFH_SIZE + fn_len_ + e_len); - loc->csize = (off_t)(method? csize_ : 0); + loc->ofs = file_ofs; + loc->csize = (off_t)(method? csize : 0); // if not compressed, csize = 0 (see zfile_compressed) - loc->ucsize = (off_t)ucsize_; + loc->ucsize = (off_t)ucsize; loc->mtime = convert_dos_date(fatdate, fattime); - return 0; + return Z_CDFH_FILE_OK; } @@ -307,7 +302,7 @@ static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, s // loc is only valid during the callback! must be copied or saved. typedef int(*CDFH_CB)(uintptr_t user, i32 idx, const char* fn, size_t fn_len, const ZLoc* loc); -static int z_enum_files(const void* file, const size_t size, const CDFH_CB cb, const uintptr_t user) +static int z_enum_files(const u8* file, const size_t size, const CDFH_CB cb, const uintptr_t user) { // find "End of Central Directory Record" const u8* ecdr; @@ -330,44 +325,25 @@ static int z_enum_files(const void* file, const size_t size, const CDFH_CB cb, c i32 idx = 0; // only incremented when valid, so we don't leave holes // in lookup's arrays (bad locality). - i32 entries_left = num_entries; - for(;;) - { - entries_left--; - ssize_t bytes_left = (ssize_t)size - ( cdfh - (u8*)file ); + for(i32 i = 0; i < num_entries; i++) + { const char* fn; size_t fn_len; ZLoc loc; - // CDFH is valid and of a file - if(z_extract_cdfh(cdfh, bytes_left, fn, fn_len, &loc) == 0) + int ret = z_extract_cdfh(file, size, cdfh, fn, fn_len, &loc); + // valid file + if(ret == Z_CDFH_FILE_OK) { cb(user, idx, fn, fn_len, &loc); idx++; // see rationale above - - // advance to next cdfh (the easy way - we have a valid fn_len - // and assume there's no extra data stored after the header). - cdfh += CDFH_SIZE + fn_len; - if(*(u32*)cdfh == *(u32*)cdfh_id) - goto found_next_cdfh; - - // not found; scan for it below (as if the CDFH were invalid). - // note: don't restore the previous cdfh pointer - fn_len is - // correct and there are additional fields before the next CDFH. } - - if(!entries_left) - break; - - // scan for the next CDFH (its signature) - for(ssize_t i = 0; i < bytes_left - (ssize_t)CDFH_SIZE; i++) - if(*(u32*)(++cdfh) == *(u32*)cdfh_id) - goto found_next_cdfh; - - debug_warn("z_enum_files: next CDFH not found"); - return -1; - -found_next_cdfh:; + // next CDFH not found (Zip archive corrupted) + else if(ret == Z_CDFH_ABORT) + return -1; + // skipping this CDFH (e.g. if directory) + else + ; } return 0; @@ -436,28 +412,28 @@ struct LookupInfo }; -// support for case-insensitive filenames: the hash of each -// filename string is saved in lookup_add_file_cb and searched for by -// lookup_get_file_info. in both cases, we convert a temporary to -// lowercase before hashing it. -static void strcpy_lower(char* dst, const char* src) +// write a lower-case copy of to , which holds bytes. +// up to buf_size-1 chars are written; we always 0-terminate the output! +// +// this routine is used to convert OS and user-specified filenames +// to lowercase before hashing them and then comparing. +static void copy_lower_case(char* dst, const char* src, size_t buf_size) { + assert(buf_size > 0); // otherwise, no room for trailing '\0' + int c; do { c = *src++; + // this is the last remaining byte in the buffer. + // loop will exit below after writing 0-terminator. + if(--buf_size == 0) + c = '\0'; *dst++ = tolower(c); } while(c != '\0'); } -static void strncpy_lower(char* dst, const char* src, size_t count) -{ - int n = (int)count; - while (--n >= 0) - *dst++ = tolower(*src++); -} - // add file to the lookup data structure. // called from z_enum_files in order (0 <= idx < num_entries). @@ -499,8 +475,11 @@ static int lookup_add_file_cb(uintptr_t user, i32 idx, // hash (lower case!) filename char lc_fn[PATH_MAX]; - strncpy_lower(lc_fn, fn, fn_len); - FnHash fn_hash = fnv_hash(lc_fn, fn_len); + size_t max_size = fn_len+1; // fn not 0-terminated + if(max_size > PATH_MAX) // (this avoids stupid min() type warning) + max_size = PATH_MAX; // clamp to actual buffer size + copy_lower_case(lc_fn, fn, max_size); + FnHash fn_hash = fnv_hash(lc_fn); // fill ZEnt ZEnt* ent = li->ents + idx; @@ -524,7 +503,7 @@ static int lookup_add_file_cb(uintptr_t user, i32 idx, // initialize lookup data structure for the given Zip archive: // adds all files to the index. -static int lookup_init(LookupInfo* li, const void* file, const size_t size) +static int lookup_init(LookupInfo* li, const u8* file, const size_t size) { int err; @@ -570,12 +549,12 @@ static int lookup_free(LookupInfo* li) } -// look up ZLoc, given filename. +// look up ZLoc, given filename (untrusted!). static int lookup_get_file_info(LookupInfo* li, const char* fn, ZLoc* loc) { // hash (lower case!) filename char lc_fn[PATH_MAX]; - strcpy_lower(lc_fn, fn); + copy_lower_case(lc_fn, fn, sizeof(lc_fn)); const FnHash fn_hash = fnv_hash(lc_fn); const FnHash* fn_hashes = li->fn_hashes; @@ -696,7 +675,7 @@ static int ZArchive_reload(ZArchive* za, const char* fn, Handle) if(err < 0) goto exit_close; - err = lookup_init(&za->li, file, size); + err = lookup_init(&za->li, (u8*)file, size); if(err < 0) goto exit_unmap_close;