diff --git a/composer.json b/composer.json index 487361b7..5c9aeb01 100644 --- a/composer.json +++ b/composer.json @@ -29,7 +29,6 @@ }, "files": [ "src/WordPress/Blueprints/functions.php", - "src/WordPress/Zip/functions.php", "src/WordPress/Streams/stream_str_replace.php" ] }, diff --git a/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php b/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php index f3d7db64..3ccc4d93 100644 --- a/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php +++ b/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php @@ -4,7 +4,6 @@ use WordPress\Blueprints\Model\DataClass\UnzipStep; use WordPress\Blueprints\Progress\Tracker; -use function WordPress\Zip\zip_extract_to; class UnzipStepRunner extends BaseStepRunner { @@ -22,6 +21,6 @@ public function run( $progress_tracker->set( 10, 'Unzipping...' ); $resolved_to_path = $this->getRuntime()->resolvePath( $input->extractToPath ); - zip_extract_to( $this->getResource( $input->zipFile ), $resolved_to_path ); + throw new \Exception("Not implemented at the moment. Needs to be updated to use the new ZipStreamReader API."); } } diff --git a/src/WordPress/ByteReader/WP_Byte_Reader.php b/src/WordPress/ByteReader/WP_Byte_Reader.php new file mode 100644 index 00000000..75ee7d88 --- /dev/null +++ b/src/WordPress/ByteReader/WP_Byte_Reader.php @@ -0,0 +1,26 @@ +next_bytes() ) { + $buffer .= $this->get_bytes(); + } + if( $this->get_last_error() ) { + return false; + } + return $buffer; + } +} diff --git a/src/WordPress/ByteReader/WP_File_Reader.php b/src/WordPress/ByteReader/WP_File_Reader.php new file mode 100644 index 00000000..43da1893 --- /dev/null +++ b/src/WordPress/ByteReader/WP_File_Reader.php @@ -0,0 +1,109 @@ +file_path = $file_path; + $this->chunk_size = $chunk_size; + } + + public function length(): ?int { + return filesize( $this->file_path ); + } + + public function tell(): int { + // Save the previous offset, not the current one. + // This way, after resuming, the next read will yield the same $output_bytes + // as we have now. + return $this->offset_in_file - $this->last_chunk_size; + } + + public function seek( $offset_in_file ): bool { + if ( ! is_int( $offset_in_file ) ) { + _doing_it_wrong( __METHOD__, 'Cannot set a file reader cursor to a non-integer offset.', '1.0.0' ); + return false; + } + $this->offset_in_file = $offset_in_file; + $this->last_chunk_size = 0; + $this->output_bytes = ''; + if ( $this->file_pointer ) { + if ( false === fseek( $this->file_pointer, $this->offset_in_file ) ) { + return false; + } + } + return true; + } + + public function close() { + if(!$this->file_pointer) { + return false; + } + if(!fclose($this->file_pointer)) { + $this->last_error = 'Failed to close file pointer'; + return false; + } + $this->file_pointer = null; + $this->state = static::STATE_FINISHED; + return true; + } + + public function is_finished(): bool { + return ! $this->output_bytes && $this->state === static::STATE_FINISHED; + } + + public function get_bytes(): string { + return $this->output_bytes; + } + + public function get_last_error(): ?string { + return $this->last_error; + } + + public function next_bytes(): bool { + $this->output_bytes = ''; + $this->last_chunk_size = 0; + if ( $this->last_error || $this->is_finished() ) { + return false; + } + if ( ! $this->file_pointer ) { + $this->file_pointer = fopen( $this->file_path, 'r' ); + if ( $this->offset_in_file ) { + fseek( $this->file_pointer, $this->offset_in_file ); + } + } + $bytes = fread( $this->file_pointer, $this->chunk_size ); + if ( ! $bytes && feof( $this->file_pointer ) ) { + return false; + } + $this->last_chunk_size = strlen( $bytes ); + $this->offset_in_file += $this->last_chunk_size; + $this->output_bytes .= $bytes; + return true; + } +} diff --git a/src/WordPress/ByteReader/WP_GZ_File_Reader.php b/src/WordPress/ByteReader/WP_GZ_File_Reader.php new file mode 100644 index 00000000..1acb729e --- /dev/null +++ b/src/WordPress/ByteReader/WP_GZ_File_Reader.php @@ -0,0 +1,28 @@ +output_bytes = ''; + if ( $this->last_error || $this->is_finished() ) { + return false; + } + if ( ! $this->file_pointer ) { + $this->file_pointer = gzopen( $this->file_path, 'r' ); + if ( $this->offset_in_file ) { + gzseek( $this->file_pointer, $this->offset_in_file ); + } + } + $bytes = gzread( $this->file_pointer, $this->chunk_size ); + if ( ! $bytes && gzeof( $this->file_pointer ) ) { + gzclose( $this->file_pointer ); + $this->state->finish(); + return false; + } + $this->offset_in_file += strlen( $bytes ); + $this->output_bytes .= $bytes; + return true; + } +} diff --git a/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php b/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php new file mode 100644 index 00000000..c8c72d9f --- /dev/null +++ b/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php @@ -0,0 +1,205 @@ +seek(0); + * $file->request_bytes(100); + * while($file->next_chunk()) { + * var_dump($file->get_bytes()); + * } + * $file->seek(600); + * $file->request_bytes(40); + * while($file->next_chunk()) { + * var_dump($file->get_bytes()); + * } + * + * @TODO: Abort in-progress requests when seeking to a new offset. + */ +class WP_Remote_File_Ranged_Reader extends WP_Byte_Reader { + + private $url; + private $remote_file_length; + + private $current_reader; + private $offset_in_remote_file = 0; + private $default_expected_chunk_size = 10 * 1024; // 10 KB + private $expected_chunk_size = 10 * 1024; // 10 KB + private $stop_after_chunk = false; + + /** + * Creates a seekable reader for the remote file. + * Detects support for range requests and falls back to saving the entire + * file to disk when the remote server does not support range requests. + */ + static public function create( $url ) { + $remote_file_reader = new WP_Remote_File_Ranged_Reader( $url ); + /** + * We don't **need** the content-length header to be present. + * + * However, this reader is only used to read remote ZIP files, + * we do need to know the length of the file to be able to read + * the central directory index. + * + * Let's revisit this check once we need to read other types of + * files. + */ + if(false === $remote_file_reader->length()) { + return self::save_to_disk( $url ); + } + + /** + * Try to read the first two bytes of the file to confirm that + * the remote server supports range requests. + */ + $remote_file_reader->seek_to_chunk(0, 2); + if(false === $remote_file_reader->next_bytes()) { + return self::save_to_disk( $url ); + } + + $bytes = $remote_file_reader->get_bytes(); + if(strlen($bytes) !== 2) { + // Oops! We're streaming the entire file to disk now. Let's + // redirect the output to a local file and provide the caller + // with a regular file reader. + return self::redirect_output_to_disk( $remote_file_reader ); + } + + // The remote server supports range requests, good! We can use this reader. + // Let's return to the beginning of the file before returning. + $remote_file_reader->seek(0); + return $remote_file_reader; + } + + static private function save_to_disk( $url ) { + $remote_file_reader = new WP_Remote_File_Reader( $url ); + return self::redirect_output_to_disk( $remote_file_reader ); + } + + static private function redirect_output_to_disk( WP_Byte_Reader $reader ) { + $file_path = tempnam(sys_get_temp_dir(), 'wp-remote-file-reader-') . '.epub'; + $file = fopen($file_path, 'w'); + // We may have a bytes chunk available at this point. + if($reader->get_bytes()) { + fwrite($file, $reader->get_bytes()); + } + // Keep streaming the file until we're done. + while($reader->next_bytes()) { + fwrite($file, $reader->get_bytes()); + } + fclose($file); + if($reader->get_last_error()) { + // How should we log this error? + return false; + } + return WP_File_Reader::create( $file_path ); + } + + public function __construct( $url ) { + $this->url = $url; + } + + public function next_bytes(): bool { + while( true ) { + if ( null === $this->current_reader ) { + $this->create_reader(); + } + // Advance the offset by the length of the current chunk. + if ( $this->current_reader->get_bytes() ) { + $this->offset_in_remote_file += strlen( $this->current_reader->get_bytes() ); + } + + // We've reached the end of the remote file, we're done. + if ( $this->offset_in_remote_file >= $this->length() - 1 ) { + return false; + } + + // We've reached the end of the current chunk, request the next one. + if ( false === $this->current_reader->next_bytes() ) { + if ( $this->stop_after_chunk ) { + return false; + } + $this->current_reader = null; + continue; + } + + // We've got a chunk, return it. + return true; + } + } + + public function length() { + $this->ensure_content_length(); + if ( null === $this->remote_file_length ) { + return false; + } + return $this->remote_file_length; + } + + private function create_reader() { + $this->current_reader = new WP_Remote_File_Reader( + $this->url, + array( + 'headers' => array( + // @TODO: Detect when the remote server doesn't support range requests, + // do something sensible. We could either stream the entire file, + // or give up. + 'Range' => 'bytes=' . $this->offset_in_remote_file . '-' . ( + $this->offset_in_remote_file + $this->expected_chunk_size - 1 + ), + ), + ) + ); + } + + public function seek_to_chunk($offset, $length) { + $this->current_reader->seek($offset); + $this->expected_chunk_size = $length; + $this->stop_after_chunk = true; + } + + public function seek( $offset ): bool { + $this->offset_in_remote_file = $offset; + // @TODO cancel any pending requests + $this->current_reader = null; + $this->expected_chunk_size = $this->default_expected_chunk_size; + $this->stop_after_chunk = false; + return true; + } + + public function tell(): int { + return $this->offset_in_remote_file; + } + + public function is_finished(): bool { + return false; + } + + public function get_bytes(): ?string { + return $this->current_reader->get_bytes(); + } + + public function get_last_error(): ?string { + // @TODO: Preserve the error information when the current reader + // is reset. + return $this->current_reader->get_last_error(); + } + + private function ensure_content_length() { + if ( null !== $this->remote_file_length ) { + return $this->remote_file_length; + } + if(null === $this->current_reader) { + $this->current_reader = new WP_Remote_File_Reader( $this->url ); + } + $this->remote_file_length = $this->current_reader->length(); + return $this->remote_file_length; + } + +} diff --git a/src/WordPress/ByteReader/WP_Remote_File_Reader.php b/src/WordPress/ByteReader/WP_Remote_File_Reader.php new file mode 100644 index 00000000..3c7ab643 --- /dev/null +++ b/src/WordPress/ByteReader/WP_Remote_File_Reader.php @@ -0,0 +1,182 @@ +client = new \WordPress\AsyncHttp\Client(); + $this->url = $url; + $this->headers = $headers; + } + + public function tell(): int { + return $this->bytes_already_read + $this->skip_bytes; + } + + public function seek( $offset_in_file ): bool { + if ( $this->request ) { + _doing_it_wrong( + __METHOD__, + 'Cannot seek() a WP_Remote_File_Reader instance once the request was initialized. ' . + 'Use WP_Remote_File_Ranged_Reader to seek() using range requests instead.', + '1.0.0' + ); + return false; + } + $this->skip_bytes = $offset_in_file; + return true; + } + + public function next_bytes(): bool { + if ( null === $this->request ) { + $this->request = new \WordPress\AsyncHttp\Request( + $this->url, + array( 'headers' => $this->headers ) + ); + if ( false === $this->client->enqueue( $this->request ) ) { + // TODO: Think through error handling + return false; + } + } + + $this->after_chunk(); + + while ( $this->client->await_next_event() ) { + $request = $this->client->get_request(); + if ( ! $request ) { + continue; + } + $response = $request->response; + if ( false === $response ) { + continue; + } + if ( $request->redirected_to ) { + continue; + } + + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: + if(null !== $this->remote_file_length) { + continue 2; + } + $content_length = $response->get_header( 'Content-Length' ); + if ( false !== $content_length ) { + $this->remote_file_length = (int) $content_length; + } + break; + case \WordPress\AsyncHttp\Client::EVENT_BODY_CHUNK_AVAILABLE: + $chunk = $this->client->get_response_body_chunk(); + if ( ! is_string( $chunk ) ) { + // TODO: Think through error handling + return false; + } + $this->current_chunk = $chunk; + + /** + * Naive seek() implementation – redownload the file from the start + * and ignore bytes until we reach the desired offset. + * + * @TODO: Use the range requests instead when the server supports them. + */ + if ( $this->skip_bytes > 0 ) { + if ( $this->skip_bytes < strlen( $chunk ) ) { + $this->current_chunk = substr( $chunk, $this->skip_bytes ); + $this->bytes_already_read += $this->skip_bytes; + $this->skip_bytes = 0; + } else { + $this->skip_bytes -= strlen( $chunk ); + continue 2; + } + } + return true; + case \WordPress\AsyncHttp\Client::EVENT_FAILED: + // TODO: Think through error handling. Errors are expected when working with + // the network. Should we auto retry? Make it easy for the caller to retry? + // Something else? + $this->last_error = $this->client->get_request()->error; + return false; + case \WordPress\AsyncHttp\Client::EVENT_FINISHED: + $this->is_finished = true; + return false; + } + } + } + + public function length(): ?int { + if ( null !== $this->remote_file_length ) { + return $this->remote_file_length; + } + + $request = new \WordPress\AsyncHttp\Request( + $this->url, + array( 'method' => 'HEAD' ) + ); + if ( false === $this->client->enqueue( $request ) ) { + // TODO: Think through error handling + return false; + } + while ( $this->client->await_next_event() ) { + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: + $request = $this->client->get_request(); + if ( ! $request ) { + return false; + } + if($request->redirected_to) { + continue 2; + } + $response = $request->response; + if ( false === $response ) { + return false; + } + $content_length = $response->get_header( 'Content-Length' ); + if ( false === $content_length ) { + return false; + } + $this->remote_file_length = (int) $content_length; + break; + } + } + if(null === $this->remote_file_length) { + return false; + } + return $this->remote_file_length; + } + + private function after_chunk() { + if ( $this->current_chunk ) { + $this->bytes_already_read += strlen( $this->current_chunk ); + } + $this->current_chunk = null; + } + + public function get_last_error(): ?string { + return $this->last_error; + } + + public function get_bytes(): ?string { + return $this->current_chunk; + } + + public function is_finished(): bool { + return $this->is_finished; + } +} diff --git a/src/WordPress/Filesystem/WP_Abstract_Filesystem.php b/src/WordPress/Filesystem/WP_Abstract_Filesystem.php new file mode 100644 index 00000000..81d1992d --- /dev/null +++ b/src/WordPress/Filesystem/WP_Abstract_Filesystem.php @@ -0,0 +1,101 @@ + The contents of the directory. + */ + abstract public function ls($parent = '/'); + + /** + * Check if a path is a directory. + * + * @param string $path The path to check. + * @return bool True if the path is a directory, false otherwise. + */ + abstract public function is_dir($path); + + /** + * Check if a path is a file. + * + * @param string $path The path to check. + * @return bool True if the path is a file, false otherwise. + */ + abstract public function is_file($path); + + /** + * Start streaming a file. + * + * @example + * + * $fs->start_streaming_file($path); + * while($fs->next_file_chunk()) { + * $chunk = $fs->get_file_chunk(); + * // process $chunk + * } + * $fs->close_file_reader(); + * + * @param string $path The path to the file. + */ + abstract public function start_streaming_file($path); + + /** + * Get the next chunk of a file. + * + * @return string|false The next chunk of the file or false if the end of the file is reached. + */ + abstract public function next_file_chunk(); + + /** + * Get the current chunk of a file. + * + * @return string|false The current chunk of the file or false if no chunk is available. + */ + abstract public function get_file_chunk(); + + /** + * Get the error message of the filesystem. + * + * @return string|false The error message or false if no error occurred. + */ + abstract public function get_error_message(); + + /** + * Close the file reader. + */ + abstract public function close_file_reader(); + + /** + * Buffers the entire contents of a file into a string + * and returns it. + * + * @param string $path The path to the file. + * @return string|false The contents of the file or false if the file does not exist. + */ + public function read_file($path) { + $this->start_streaming_file($path); + $body = ''; + while($this->next_file_chunk()) { + $chunk = $this->get_file_chunk(); + if($chunk === false) { + return false; + } + $body .= $chunk; + } + $this->close_file_reader(); + return $body; + } + +} diff --git a/src/WordPress/Filesystem/WP_File_Visitor_Event.php b/src/WordPress/Filesystem/WP_File_Visitor_Event.php new file mode 100644 index 00000000..176d5795 --- /dev/null +++ b/src/WordPress/Filesystem/WP_File_Visitor_Event.php @@ -0,0 +1,32 @@ + + */ + public $files; + + const EVENT_ENTER = 'entering'; + const EVENT_EXIT = 'exiting'; + + public function __construct( $type, $dir, $files = array() ) { + $this->type = $type; + $this->dir = $dir; + $this->files = $files; + } + + public function is_entering() { + return $this->type === self::EVENT_ENTER; + } + + public function is_exiting() { + return $this->type === self::EVENT_EXIT; + } +} diff --git a/src/WordPress/Filesystem/WP_Filesystem.php b/src/WordPress/Filesystem/WP_Filesystem.php new file mode 100644 index 00000000..6373c164 --- /dev/null +++ b/src/WordPress/Filesystem/WP_Filesystem.php @@ -0,0 +1,74 @@ +last_file_reader) { + $this->last_file_reader->close(); + } + $this->last_file_reader = \WordPress\ByteReader\WP_File_Reader::create($path); + return $this->last_file_reader->next_bytes(); + } + + public function next_file_chunk() { + return $this->last_file_reader->next_bytes(); + } + + public function get_file_chunk() { + return $this->last_file_reader->get_bytes(); + } + + public function get_error_message() { + return $this->last_file_reader->get_last_error(); + } + + public function close_file_reader() { + if($this->last_file_reader) { + $this->last_file_reader->close(); + $this->last_file_reader = null; + } + } + +} diff --git a/src/WordPress/Filesystem/WP_Filesystem_Visitor.php b/src/WordPress/Filesystem/WP_Filesystem_Visitor.php new file mode 100644 index 00000000..e6403bed --- /dev/null +++ b/src/WordPress/Filesystem/WP_Filesystem_Visitor.php @@ -0,0 +1,91 @@ +filesystem = $filesystem; + $this->root_dir = $dir; + $this->iterator_stack[] = $this->create_iterator( $dir ); + } + + public function get_current_depth() { + return $this->depth; + } + + public function get_root_dir() { + return $this->root_dir; + } + + public function next() { + while ( ! empty( $this->iterator_stack ) ) { + $this->current_iterator = end( $this->iterator_stack ); + + if ( ! $this->current_iterator->valid() ) { + array_pop( $this->iterator_stack ); + continue; + } + $current = $this->current_iterator->current(); + $this->current_iterator->next(); + + if ( ! ( $current instanceof WP_File_Visitor_Event ) ) { + // It's a directory path, push a new iterator onto the stack + $this->iterator_stack[] = $this->create_iterator( $current ); + continue; + } + + if ( $current->is_entering() ) { + ++$this->depth; + } + $this->current_event = $current; + if ( $current->is_exiting() ) { + --$this->depth; + } + return true; + } + + return false; + } + + public function get_event(): ?WP_File_Visitor_Event { + return $this->current_event; + } + + private function create_iterator( $dir ) { + $this->directories = array(); + $this->files = array(); + + $filesystem = $this->filesystem; + $children = $filesystem->ls($dir); + if ( $children === false ) { + return new \ArrayIterator( array() ); + } + + foreach($children as $child) { + if ( $filesystem->is_dir( $dir . '/' . $child ) ) { + $this->directories[] = $child; + continue; + } + $this->files[] = $child; + } + + $events = array(); + $events[] = new WP_File_Visitor_Event( WP_File_Visitor_Event::EVENT_ENTER, $dir, $this->files ); + $prefix = $dir === '/' ? '' : $dir; + foreach ( $this->directories as $directory ) { + $events[] = $prefix . '/' . $directory; // Placeholder for recursion + } + $events[] = new WP_File_Visitor_Event( WP_File_Visitor_Event::EVENT_EXIT, $dir ); + return new \ArrayIterator( $events ); + } + +} diff --git a/src/WordPress/Filesystem/WP_Zip_Filesystem.php b/src/WordPress/Filesystem/WP_Zip_Filesystem.php new file mode 100644 index 00000000..d7caa708 --- /dev/null +++ b/src/WordPress/Filesystem/WP_Zip_Filesystem.php @@ -0,0 +1,270 @@ +zip = new ZipStreamReader($byte_reader); + $this->byte_reader = $byte_reader; + } + + public function ls($parent = '/') { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + + $descendants = $this->central_directory; + + // Only keep the descendants of the given parent. + $parent = trim($parent, '/') ; + $prefix = $parent ? $parent . '/' : ''; + if(strlen($prefix) > 1) { + $filtered_descendants = []; + foreach($descendants as $entry) { + $path = $entry['path']; + if(strpos($path, $prefix) !== 0) { + continue; + } + $filtered_descendants[] = $entry; + } + $descendants = $filtered_descendants; + } + + // Only keep the direct children of the parent. + $children = []; + foreach($descendants as $entry) { + $suffix = substr($entry['path'], strlen($prefix)); + if(str_contains($suffix, '/')) { + continue; + } + // No need to include the directory itself. + if(strlen($suffix) === 0) { + continue; + } + $children[] = $suffix; + } + return $children; + } + + public function is_dir($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_DIR === $this->central_directory[$path]['type']; + } + + public function is_file($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_FILE === $this->central_directory[$path]['type']; + } + + public function start_streaming_file($path) { + $this->opened_file_finished = false; + $this->file_chunk = null; + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + if(!isset($this->central_directory[$path])) { + _doing_it_wrong( + __METHOD__, + sprintf('File %s not found', $path), + '1.0.0' + ); + return false; + } + if(self::TYPE_FILE !== $this->central_directory[$path]['type']) { + _doing_it_wrong( + __METHOD__, + sprintf('Path %s is not a file', $path), + '1.0.0' + ); + return false; + } + return $this->zip->seek_to_record($this->central_directory[$path]['firstByteAt']); + } + + public function next_file_chunk() { + if ( $this->state === self::STATE_ERROR ) { + return false; + } + if ( $this->opened_file_finished ) { + $this->file_chunk = null; + return false; + } + if ( false === $this->zip->next() ) { + return false; + } + if ( NewZipStreamReader::STATE_FILE_ENTRY !== $this->zip->get_state() ) { + return false; + } + $this->file_chunk = $this->zip->get_file_body_chunk(); + if($this->zip->count_remaining_file_body_bytes() === 0) { + $this->opened_file_finished = true; + } + return true; + } + + public function get_file_chunk(): string { + return $this->file_chunk ?? ''; + } + + public function get_error_message() { + return $this->error_message; + } + + private function load_central_directory() { + if($this->state === self::STATE_ERROR) { + return false; + } + if(null !== $this->central_directory) { + return true; + } + + if($this->central_directory_size() >= self::MAX_CENTRAL_DIRECTORY_SIZE) { + return false; + } + + // Read the central directory into memory. + if(false === $this->seek_to_central_directory_index()) { + return false; + } + + $central_directory = array(); + while($this->zip->next()) { + if(NewZipStreamReader::STATE_CENTRAL_DIRECTORY_ENTRY !== $this->zip->get_state()) { + continue; + } + $central_directory[] = $this->zip->get_header(); + } + + // Transform the central directory into a tree structure with + // directories and files. + foreach($central_directory as $entry) { + /** + * Directory are sometimes indicated by a path + * ending with a right trailing slash. Let's remove it + * to avoid an empty entry at the end of $path_segments. + */ + $path_segments = explode('/', $entry['path']); + + for($i=0; $i < count($path_segments)-1; $i++) { + $path_so_far = implode('/', array_slice($path_segments, 0, $i + 1)); + if(isset($this->central_directory[$path_so_far])) { + if(self::TYPE_DIR !== $this->central_directory[$path_so_far]['type']) { + $this->set_error('Path stored both as a file and a directory: ' . $path_so_far); + return false; + } + } + $this->central_directory[$path_so_far] = array( + 'path' => $path_so_far, + 'type' => self::TYPE_DIR, + ); + } + /** + * Only create a file entry if it's not a directory. + */ + if(!str_ends_with($entry['path'], '/')) { + $this->central_directory[$entry['path']] = $entry; + $this->central_directory[$entry['path']]['type'] = self::TYPE_FILE; + } + } + + return true; + } + + private function set_error($message) { + $this->state = self::STATE_ERROR; + $this->error_message = $message; + } + + private function central_directory_size() { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->central_directory_end_header['centralDirectorySize']; + } + + private function seek_to_central_directory_index() + { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->zip->seek_to_record($this->central_directory_end_header['centralDirectoryOffset']); + } + + private function collect_central_directory_end_header() { + if( null !== $this->central_directory_end_header ) { + return true; + } + + $length = $this->byte_reader->length(); + if(true !== $this->zip->seek_to_record($length - 22)) { + return false; + } + if(true !== $this->zip->next()) { + return false; + } + if($this->zip->get_state() !== NewZipStreamReader::STATE_END_CENTRAL_DIRECTORY_ENTRY) { + return false; + } + + $this->central_directory_end_header = $this->zip->get_header(); + return true; + } + + public function close_file_reader() { + return true; + } + +} diff --git a/src/WordPress/Zip/NewZipStreamReader.php b/src/WordPress/Zip/NewZipStreamReader.php deleted file mode 100644 index b49552b2..00000000 --- a/src/WordPress/Zip/NewZipStreamReader.php +++ /dev/null @@ -1,357 +0,0 @@ - $this->file_path, - 'zip_file_bytes_parsed_so_far' => $this->zip_file_bytes_parsed_so_far, - 'file_entry_body_bytes_parsed_so_far' => $this->file_entry_body_bytes_parsed_so_far, - 'state' => $this->state, - 'header' => $this->header, - 'file_body_chunk' => $this->file_body_chunk, - 'paused_incomplete_input' => $this->paused_incomplete_input, - ]; - } - - public function resume($paused) { - $this->file_path = $paused['file_path']; - $this->zip_file_bytes_parsed_so_far = 0; - $this->state = $paused['state']; - $this->header = $paused['header']; - $this->file_body_chunk = $paused['file_body_chunk']; - $this->paused_incomplete_input = $paused['paused_incomplete_input']; - - $this->fp = fopen($this->file_path, 'rb'); - if($paused['file_entry_body_bytes_parsed_so_far'] > 0) { - $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); - $file_starts_at = $paused['zip_file_bytes_parsed_so_far'] - $paused['file_entry_body_bytes_parsed_so_far']; - $this->zip_file_bytes_parsed_so_far = $file_starts_at; - fseek($this->fp, $file_starts_at); - while(true) { - $missing_bytes = $paused['file_entry_body_bytes_parsed_so_far'] - $this->file_entry_body_bytes_parsed_so_far; - $missing_bytes = max(0, min(4096, $missing_bytes)); - if($missing_bytes === 0) { - break; - } - $this->read_file_entry_body_chunk($missing_bytes); - } - } else { - $this->zip_file_bytes_parsed_so_far = $paused['zip_file_bytes_parsed_so_far']; - fseek($this->fp, $this->zip_file_bytes_parsed_so_far); - } - } - - public function __construct($file_path) { - $this->file_path = $file_path; - } - - public function is_paused_at_incomplete_input(): bool { - return $this->paused_incomplete_input; - } - - public function is_finished(): bool - { - return self::STATE_COMPLETE === $this->state || self::STATE_ERROR === $this->state; - } - - public function get_state() - { - return $this->state; - } - - public function get_header() - { - return $this->header; - } - - public function get_file_path() - { - if(!$this->header) { - return null; - } - - return $this->header['path']; - } - - public function get_file_body_chunk() - { - return $this->file_body_chunk; - } - - public function get_last_error(): ?string - { - return $this->error_message; - } - - public function next() - { - do { - if(self::STATE_SCAN === $this->state) { - if(false === $this->scan()) { - return false; - } - } - - switch ($this->state) { - case self::STATE_ERROR: - case self::STATE_COMPLETE: - return false; - - case self::STATE_FILE_ENTRY: - if (false === $this->read_file_entry()) { - return false; - } - break; - - case self::STATE_CENTRAL_DIRECTORY_ENTRY: - if (false === $this->read_central_directory_entry()) { - return false; - } - break; - - case self::STATE_END_CENTRAL_DIRECTORY_ENTRY: - if (false === $this->read_end_central_directory_entry()) { - return false; - } - break; - - default: - return false; - } - } while (self::STATE_SCAN === $this->state); - - return true; - } - - private function read_central_directory_entry() - { - if ($this->header && !empty($this->header['path'])) { - $this->header = null; - $this->state = self::STATE_SCAN; - return; - } - - if (!$this->header) { - $data = $this->consume_bytes(42); - if ($data === false) { - $this->paused_incomplete_input = true; - return false; - } - $this->header = unpack( - 'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt', - $data - ); - } - - if($this->header) { - $n = $this->header['pathLength'] + $this->header['extraLength'] + $this->header['fileCommentLength']; - $this->header['path'] = $this->consume_bytes($this->header['pathLength']); - $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); - $this->header['fileComment'] = $this->consume_bytes($this->header['fileCommentLength']); - if(!$this->header['path']) { - $this->set_error('Empty path in central directory entry'); - } - } - } - - private function read_end_central_directory_entry() - { - if ($this->header && ( !empty($this->header['comment']) || 0 === $this->header['commentLength'] )) { - $this->header = null; - $this->state = self::STATE_SCAN; - return; - } - - if(!$this->header) { - $data = $this->consume_bytes(18); - if ($data === false) { - $this->paused_incomplete_input = true; - return false; - } - $this->header = unpack( - 'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength', - $data - ); - } - - if($this->header && empty($this->header['comment']) && $this->header['commentLength'] > 0) { - $comment = $this->consume_bytes($this->header['commentLength']); - if(false === $comment) { - $this->paused_incomplete_input = true; - return false; - } - $this->header['comment'] = $comment; - } - } - - private function scan() { - $signature = $this->consume_bytes(4); - if ($signature === false || 0 === strlen($signature)) { - $this->paused_incomplete_input = true; - return false; - } - $signature = unpack('V', $signature)[1]; - switch($signature) { - case self::SIGNATURE_FILE: - $this->state = self::STATE_FILE_ENTRY; - break; - case self::SIGNATURE_CENTRAL_DIRECTORY: - $this->state = self::STATE_CENTRAL_DIRECTORY_ENTRY; - break; - case self::SIGNATURE_CENTRAL_DIRECTORY_END: - $this->state = self::STATE_END_CENTRAL_DIRECTORY_ENTRY; - break; - default: - $this->set_error('Invalid signature ' . $signature); - return false; - } - } - - /** - * Reads a file entry from a zip file. - * - * The file entry is structured as follows: - * - * ``` - * Offset Bytes Description - * 0 4 Local file header signature = 0x04034b50 (PK♥♦ or "PK\3\4") - * 4 2 Version needed to extract (minimum) - * 6 2 General purpose bit flag - * 8 2 Compression method; e.g. none = 0, DEFLATE = 8 (or "\0x08\0x00") - * 10 2 File last modification time - * 12 2 File last modification date - * 14 4 CRC-32 of uncompressed data - * 18 4 Compressed size (or 0xffffffff for ZIP64) - * 22 4 Uncompressed size (or 0xffffffff for ZIP64) - * 26 2 File name length (n) - * 28 2 Extra field length (m) - * 30 n File name - * 30+n m Extra field - * ``` - * - * @param resource $stream - */ - private function read_file_entry() - { - if(false === $this->read_file_entry_header()) { - return false; - } - if(false === $this->read_file_entry_body_chunk()) { - return false; - } - } - - private function read_file_entry_header() { - if (null === $this->header) { - $data = $this->consume_bytes(26); - if ($data === false) { - $this->paused_incomplete_input = true; - return false; - } - $this->header = unpack( - 'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength', - $data - ); - $this->file_entry_body_bytes_parsed_so_far = 0; - } - - if($this->header && empty($this->header['path'])) { - $this->header['path'] = $this->consume_bytes($this->header['pathLength']); - $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); - if($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { - $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); - } - } - } - - private function read_file_entry_body_chunk($max_bytes_to_read=4096) { - $this->file_body_chunk = null; - - $file_body_bytes_left = $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far; - if($file_body_bytes_left === 0) { - $this->header = null; - $this->inflate_handle = null; - $this->file_entry_body_bytes_parsed_so_far = 0; - $this->state = self::STATE_SCAN; - return; - } - - $chunk_size = min($max_bytes_to_read, $file_body_bytes_left); - $compressed_bytes = $this->consume_bytes($chunk_size); - $this->file_entry_body_bytes_parsed_so_far += strlen($compressed_bytes); - - if ($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { - $uncompressed_bytes = inflate_add($this->inflate_handle, $compressed_bytes, ZLIB_PARTIAL_FLUSH); - if ( $uncompressed_bytes === false || inflate_get_status( $this->inflate_handle ) === false ) { - $this->set_error('Failed to inflate'); - return false; - } - } else { - $uncompressed_bytes = $compressed_bytes; - } - - $this->file_body_chunk = $uncompressed_bytes; - } - - private function set_error($message) { - $this->state = self::STATE_ERROR; - $this->error_message = $message; - $this->paused_incomplete_input = false; - } - - private function consume_bytes($n) { - if(0 === $n) { - return ''; - } - if(null === $this->fp) { - $this->fp = fopen($this->file_path, 'rb'); - } - - $this->zip_file_bytes_parsed_so_far += $n; - $bytes_read = fread($this->fp, $n); - if(false === $bytes_read || '' === $bytes_read) { - fclose($this->fp); - $this->state = self::STATE_COMPLETE; - return false; - } - return $bytes_read; - } - -} diff --git a/src/WordPress/Zip/WP_Zip_Filesystem.php b/src/WordPress/Zip/WP_Zip_Filesystem.php new file mode 100644 index 00000000..d55f9030 --- /dev/null +++ b/src/WordPress/Zip/WP_Zip_Filesystem.php @@ -0,0 +1,271 @@ +zip = new ZipStreamReader($byte_reader); + $this->byte_reader = $byte_reader; + } + + public function ls($parent = '/') { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + + $descendants = $this->central_directory; + + // Only keep the descendants of the given parent. + $parent = trim($parent, '/') ; + $prefix = $parent ? $parent . '/' : ''; + if(strlen($prefix) > 1) { + $filtered_descendants = []; + foreach($descendants as $entry) { + $path = $entry['path']; + if(strpos($path, $prefix) !== 0) { + continue; + } + $filtered_descendants[] = $entry; + } + $descendants = $filtered_descendants; + } + + // Only keep the direct children of the parent. + $children = []; + foreach($descendants as $entry) { + $suffix = substr($entry['path'], strlen($prefix)); + if(str_contains($suffix, '/')) { + continue; + } + // No need to include the directory itself. + if(strlen($suffix) === 0) { + continue; + } + $children[] = $suffix; + } + return $children; + } + + public function is_dir($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_DIR === $this->central_directory[$path]['type']; + } + + public function is_file($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_FILE === $this->central_directory[$path]['type']; + } + + public function start_streaming_file($path) { + $this->opened_file_finished = false; + $this->file_chunk = null; + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + if(!isset($this->central_directory[$path])) { + _doing_it_wrong( + __METHOD__, + sprintf('File %s not found', $path), + '1.0.0' + ); + return false; + } + if(self::TYPE_FILE !== $this->central_directory[$path]['type']) { + _doing_it_wrong( + __METHOD__, + sprintf('Path %s is not a file', $path), + '1.0.0' + ); + return false; + } + return $this->zip->seek_to_record($this->central_directory[$path]['firstByteAt']); + } + + public function next_file_chunk() { + if ( $this->state === self::STATE_ERROR ) { + return false; + } + if ( $this->opened_file_finished ) { + $this->file_chunk = null; + return false; + } + if ( false === $this->zip->next() ) { + return false; + } + if ( ZipStreamReader::STATE_FILE_ENTRY !== $this->zip->get_state() ) { + return false; + } + $this->file_chunk = $this->zip->get_file_body_chunk(); + if($this->zip->count_remaining_file_body_bytes() === 0) { + $this->opened_file_finished = true; + } + return true; + } + + public function get_file_chunk(): string { + return $this->file_chunk ?? ''; + } + + public function get_error_message() { + return $this->error_message; + } + + private function load_central_directory() { + if($this->state === self::STATE_ERROR) { + return false; + } + if(null !== $this->central_directory) { + return true; + } + + if($this->central_directory_size() >= self::MAX_CENTRAL_DIRECTORY_SIZE) { + return false; + } + + // Read the central directory into memory. + if(false === $this->seek_to_central_directory_index()) { + return false; + } + + $central_directory = array(); + while($this->zip->next()) { + if(ZipStreamReader::STATE_CENTRAL_DIRECTORY_ENTRY !== $this->zip->get_state()) { + continue; + } + $central_directory[] = $this->zip->get_header(); + } + + // Transform the central directory into a tree structure with + // directories and files. + foreach($central_directory as $entry) { + /** + * Directory are sometimes indicated by a path + * ending with a right trailing slash. Let's remove it + * to avoid an empty entry at the end of $path_segments. + */ + $path_segments = explode('/', $entry['path']); + + for($i=0; $i < count($path_segments)-1; $i++) { + $path_so_far = implode('/', array_slice($path_segments, 0, $i + 1)); + if(isset($this->central_directory[$path_so_far])) { + if(self::TYPE_DIR !== $this->central_directory[$path_so_far]['type']) { + $this->set_error('Path stored both as a file and a directory: ' . $path_so_far); + return false; + } + } + $this->central_directory[$path_so_far] = array( + 'path' => $path_so_far, + 'type' => self::TYPE_DIR, + ); + } + /** + * Only create a file entry if it's not a directory. + */ + if(!str_ends_with($entry['path'], '/')) { + $this->central_directory[$entry['path']] = $entry; + $this->central_directory[$entry['path']]['type'] = self::TYPE_FILE; + } + } + + return true; + } + + private function set_error($message) { + $this->state = self::STATE_ERROR; + $this->error_message = $message; + } + + private function central_directory_size() { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->central_directory_end_header['centralDirectorySize']; + } + + private function seek_to_central_directory_index() + { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->zip->seek_to_record($this->central_directory_end_header['centralDirectoryOffset']); + } + + private function collect_central_directory_end_header() { + if( null !== $this->central_directory_end_header ) { + return true; + } + + $length = $this->byte_reader->length(); + if(true !== $this->zip->seek_to_record($length - 22)) { + return false; + } + if(true !== $this->zip->next()) { + return false; + } + if($this->zip->get_state() !== ZipStreamReader::STATE_END_CENTRAL_DIRECTORY_ENTRY) { + return false; + } + + $this->central_directory_end_header = $this->zip->get_header(); + return true; + } + + public function close_file_reader() { + return true; + } + +} diff --git a/src/WordPress/Zip/ZipStreamReader.php b/src/WordPress/Zip/ZipStreamReader.php index daf087b5..a6b8437d 100644 --- a/src/WordPress/Zip/ZipStreamReader.php +++ b/src/WordPress/Zip/ZipStreamReader.php @@ -2,6 +2,11 @@ namespace WordPress\Zip; +use WordPress\ByteReader\WP_Byte_Reader; + +/** + * + */ class ZipStreamReader { const SIGNATURE_FILE = 0x04034b50; @@ -9,28 +14,217 @@ class ZipStreamReader { const SIGNATURE_CENTRAL_DIRECTORY_END = 0x06054b50; const COMPRESSION_DEFLATE = 8; - /** - * Reads the next zip entry from a stream of zip file bytes. - * - * @param resource $fp A stream of zip file bytes. - */ - public static function readEntry( $fp ) { - $signature = static::read_bytes( $fp, 4 ); - if ( $signature === false ) { - return null; + private $state = ZipStreamReader::STATE_SCAN; + private $header = null; + private $file_body_chunk = null; + private $paused_incomplete_input = false; + private $error_message; + private $inflate_handle; + private $last_record_at = null; + private $byte_reader; + private $byte_buffer = ''; + private $file_bytes_consumed_so_far = 0; + private $file_entry_body_bytes_parsed_so_far = 0; + + const STATE_SCAN = 'scan'; + const STATE_FILE_ENTRY = 'file-entry'; + const STATE_CENTRAL_DIRECTORY_ENTRY = 'central-directory-entry'; + const STATE_CENTRAL_DIRECTORY_ENTRY_EXTRA = 'central-directory-entry-extra'; + const STATE_END_CENTRAL_DIRECTORY_ENTRY = 'end-central-directory-entry'; + const STATE_END_CENTRAL_DIRECTORY_ENTRY_EXTRA = 'end-central-directory-entry-extra'; + const STATE_COMPLETE = 'complete'; + const STATE_ERROR = 'error'; + + public function __construct(WP_Byte_Reader $byte_reader) { + $this->byte_reader = $byte_reader; + } + + public function is_paused_at_incomplete_input(): bool { + return $this->paused_incomplete_input; + } + + public function is_finished(): bool + { + return self::STATE_COMPLETE === $this->state || self::STATE_ERROR === $this->state; + } + + public function get_state() + { + return $this->state; + } + + public function get_header() + { + return $this->header; + } + + public function get_file_path() + { + if(!$this->header) { + return null; + } + + return $this->header['path']; + } + + public function get_file_body_chunk() + { + return $this->file_body_chunk; + } + + public function count_remaining_file_body_bytes() { + return $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far; + } + + public function get_last_error(): ?string + { + return $this->error_message; + } + + public function next() + { + do { + if(self::STATE_SCAN === $this->state) { + if(false === $this->scan()) { + return false; + } + } + + switch ($this->state) { + case self::STATE_ERROR: + case self::STATE_COMPLETE: + return false; + + case self::STATE_FILE_ENTRY: + if (false === $this->read_file_entry()) { + return false; + } + break; + + case self::STATE_CENTRAL_DIRECTORY_ENTRY: + if (false === $this->read_central_directory_entry()) { + return false; + } + break; + + case self::STATE_END_CENTRAL_DIRECTORY_ENTRY: + if (false === $this->read_end_central_directory_entry()) { + return false; + } + break; + + default: + return false; + } + } while (self::STATE_SCAN === $this->state); + + return true; + } + + public function seek_to_record($record_offset) { + $this->after_record(); + if( false === $this->byte_reader->seek($record_offset) ) { + return false; + } + $this->byte_buffer = ''; + $this->file_bytes_consumed_so_far = $record_offset; + return true; + } + + public function tell() { + return $this->last_record_at; + } + + private function after_record() { + $this->state = self::STATE_SCAN; + $this->header = null; + // @TODO: Does the inflate_handle need an fclose() or so call? + $this->inflate_handle = null; + $this->file_body_chunk = null; + $this->file_entry_body_bytes_parsed_so_far = 0; + } + + private function read_central_directory_entry() + { + if ($this->header && ! empty($this->header['path'])) { + $this->after_record(); + return; } - $signature = unpack( 'V', $signature )[1]; - if ( $signature === static::SIGNATURE_FILE ) { - return static::readFileEntry( $fp ); - } elseif ( $signature === static::SIGNATURE_CENTRAL_DIRECTORY ) { - return static::readCentralDirectoryEntry( $fp, true ); - } elseif ( $signature === static::SIGNATURE_CENTRAL_DIRECTORY_END ) { - return static::readEndCentralDirectoryEntry( $fp, true ); + + if (!$this->header) { + $data = $this->consume_bytes(42); + if ($data === false) { + $this->paused_incomplete_input = true; + return false; + } + $this->header = unpack( + 'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt', + $data + ); } - return null; + if($this->header) { + $this->header['path'] = $this->sanitize_path($this->consume_bytes($this->header['pathLength'])); + $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); + $this->header['fileComment'] = $this->consume_bytes($this->header['fileCommentLength']); + if(!$this->header['path']) { + $this->set_error('Empty path in central directory entry'); + } + } } + private function read_end_central_directory_entry() + { + if ($this->header && ( !empty($this->header['comment']) || 0 === $this->header['commentLength'] )) { + $this->after_record(); + return; + } + + if(!$this->header) { + $data = $this->consume_bytes(18); + if ($data === false) { + $this->paused_incomplete_input = true; + return false; + } + $this->header = unpack( + 'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength', + $data + ); + } + + if($this->header && empty($this->header['comment']) && $this->header['commentLength'] > 0) { + $comment = $this->consume_bytes($this->header['commentLength']); + if(false === $comment) { + $this->paused_incomplete_input = true; + return false; + } + $this->header['comment'] = $comment; + } + } + + private function scan() { + $this->last_record_at = $this->file_bytes_consumed_so_far; + $signature = $this->consume_bytes(4); + if ($signature === false || 0 === strlen($signature)) { + $this->paused_incomplete_input = true; + return false; + } + $signature = unpack('V', $signature)[1]; + switch($signature) { + case self::SIGNATURE_FILE: + $this->state = self::STATE_FILE_ENTRY; + break; + case self::SIGNATURE_CENTRAL_DIRECTORY: + $this->state = self::STATE_CENTRAL_DIRECTORY_ENTRY; + break; + case self::SIGNATURE_CENTRAL_DIRECTORY_END: + $this->state = self::STATE_END_CENTRAL_DIRECTORY_ENTRY; + break; + default: + $this->set_error('Invalid signature ' . $signature); + return false; + } + } /** * Reads a file entry from a zip file. @@ -56,164 +250,123 @@ public static function readEntry( $fp ) { * * @param resource $stream */ - protected static function readFileEntry( $stream ): ZipFileEntry { - $data = self::read_bytes( $stream, 26 ); - $data = unpack( - 'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength', - $data - ); - $path = self::read_bytes( $stream, $data['pathLength'] ); - $extra = self::read_bytes( $stream, $data['extraLength'] ); - $bytes = self::read_bytes( $stream, $data['compressedSize'] ); - - if ( $data['compressionMethod'] === static::COMPRESSION_DEFLATE ) { - try { - $bytes = gzinflate( $bytes ); - } catch ( \Throwable $e ) { - // Ignore the error + private function read_file_entry() + { + if(false === $this->read_file_entry_header()) { + return false; + } + if(false === $this->read_file_entry_body_chunk()) { + return false; + } + } + + private function read_file_entry_header() { + if (null === $this->header) { + $data = $this->consume_bytes(26); + if ($data === false) { + $this->paused_incomplete_input = true; + return false; + } + $this->header = unpack( + 'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength', + $data + ); + $this->file_entry_body_bytes_parsed_so_far = 0; + } + + if($this->header && empty($this->header['path'])) { + $this->header['path'] = $this->sanitize_path($this->consume_bytes($this->header['pathLength'])); + $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); + if($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { + $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); + } + } + } + + private function read_file_entry_body_chunk($max_bytes_to_read=4096) { + $this->file_body_chunk = null; + + $file_body_bytes_left = $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far; + if($file_body_bytes_left === 0) { + $this->after_record(); + return; + } + + $chunk_size = min($max_bytes_to_read, $file_body_bytes_left); + $compressed_bytes = $this->consume_bytes($chunk_size); + $this->file_entry_body_bytes_parsed_so_far += strlen($compressed_bytes); + + if ($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { + if(!$this->inflate_handle) { + $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); + } + $uncompressed_bytes = inflate_add($this->inflate_handle, $compressed_bytes, ZLIB_PARTIAL_FLUSH); + if ( $uncompressed_bytes === false || inflate_get_status( $this->inflate_handle ) === false ) { + $this->set_error('Failed to inflate'); + return false; } + } else { + $uncompressed_bytes = $compressed_bytes; } - return new ZipFileEntry( - $data['versionNeeded'], - $data['generalPurpose'], - $data['compressionMethod'], - $data['lastModifiedTime'], - $data['lastModifiedDate'], - $data['crc'], - $data['compressedSize'], - $data['uncompressedSize'], - $path, - $extra, - $bytes - ); + $this->file_body_chunk = $uncompressed_bytes; } - /** - * Reads a central directory entry from a zip file. - * - * The central directory entry is structured as follows: - * - * ``` - * Offset Bytes Description - * 0 4 Central directory file header signature = 0x02014b50 - * 4 2 Version made by - * 6 2 Version needed to extract (minimum) - * 8 2 General purpose bit flag - * 10 2 Compression method - * 12 2 File last modification time - * 14 2 File last modification date - * 16 4 CRC-32 of uncompressed data - * 20 4 Compressed size (or 0xffffffff for ZIP64) - * 24 4 Uncompressed size (or 0xffffffff for ZIP64) - * 28 2 File name length (n) - * 30 2 Extra field length (m) - * 32 2 File comment length (k) - * 34 2 Disk number where file starts (or 0xffff for ZIP64) - * 36 2 Internal file attributes - * 38 4 External file attributes - * 42 4 Relative offset of local file header (or 0xffffffff for ZIP64). This is the number of bytes between the start of the first disk on which the file occurs, and the start of the local file header. This allows software reading the central directory to locate the position of the file inside the ZIP file. - * 46 n File name - * 46+n m Extra field - * 46+n+m k File comment - * ``` - * - * @param resource stream - */ - protected static function readCentralDirectoryEntry( $stream ): ZipCentralDirectoryEntry { - $data = static::read_bytes( $stream, 42 ); - $data = unpack( - 'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt', - $data - ); - $path = static::read_bytes( $stream, $data['pathLength'] ); - $extra = static::read_bytes( $stream, $data['extraLength'] ); - $fileComment = static::read_bytes( $stream, $data['fileCommentLength'] ); - - return new ZipCentralDirectoryEntry( - $data['versionCreated'], - $data['versionNeeded'], - $data['generalPurpose'], - $data['compressionMethod'], - $data['lastModifiedTime'], - $data['lastModifiedDate'], - $data['crc'], - $data['compressedSize'], - $data['uncompressedSize'], - $data['diskNumber'], - $data['internalAttributes'], - $data['externalAttributes'], - $data['firstByteAt'], - $data['firstByteAt'] + 30 + $data['pathLength'] + $data['fileCommentLength'] + $data['extraLength'] + $data['compressionMethod'] - 1, - $path, - $extra, - $fileComment - ); + private function set_error($message) { + $this->state = self::STATE_ERROR; + $this->error_message = $message; + $this->paused_incomplete_input = false; } /** - * Reads the end of central directory entry from a zip file. + * Normalizes the parsed path to prevent directory traversal, + * a.k.a zip slip attacks. * - * The end of central directory entry is structured as follows: + * In ZIP, paths are arbitrary byte sequences. Nothing prevents + * a ZIP file from containing a path such as /etc/passwd or + * ../../../../etc/passwd. * - * ``` - * Offset Bytes Description[33] - * 0 4 End of central directory signature = 0x06054b50 - * 4 2 Number of this disk (or 0xffff for ZIP64) - * 6 2 Disk where central directory starts (or 0xffff for ZIP64) - * 8 2 Number of central directory records on this disk (or 0xffff for ZIP64) - * 10 2 Total number of central directory records (or 0xffff for ZIP64) - * 12 4 Size of central directory (bytes) (or 0xffffffff for ZIP64) - * 16 4 Offset of start of central directory, relative to start of archive (or 0xffffffff for ZIP64) - * 20 2 Comment length (n) - * 22 n Comment - * ``` - * - * @param resource $stream + * This function normalizes paths found in the ZIP file. + * + * @TODO: Scrutinize the implementation of this function. Consider + * unicode characters in the path, including ones that are + * just embelishments of the following character. Consider + * the impact of **all** seemingly "invalid" byte sequences, + * e.g. spaces, ASCII control characters, etc. What will the + * OS do when it receives a path containing .{null byte}./etc/passwd? */ - protected static function readEndCentralDirectoryEntry( $stream ): ZipEndCentralDirectoryEntry { - $data = static::read_bytes( $stream, 18 ); - $data = unpack( - 'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength', - $data - ); - - return new ZipEndCentralDirectoryEntry( - $data['diskNumber'], - $data['centralDirectoryStartDisk'], - $data['numberCentralDirectoryRecordsOnThisDisk'], - $data['numberCentralDirectoryRecords'], - $data['centralDirectorySize'], - $data['centralDirectoryOffset'], - static::read_bytes( $stream, $data['commentLength'] ) - ); + private function sanitize_path($path) { + // Replace multiple slashes with a single slash. + $path = preg_replace('#/+#', '/', $path); + // Remove all the leading ../ segments. + $path = preg_replace('#^(\.\./)+#', '', $path); + // Remove all the /./ and /../ segments. + $path = preg_replace('#/\.\.?/#', '/', $path); + return $path; } - /** - * Reads a fixed number of bytes from a stream. - * Unlike fread(), this function will block until enough bytes are available. - * - * @param $stream - * @param $length - * - * @return false|string - */ - protected static function read_bytes( $stream, $length ) { - if ( $length === 0 ) { + private function consume_bytes($n) { + if(0 === $n) { return ''; } - $data = ''; - $remaining_length = $length; - while ( $remaining_length > 0 ) { - $chunk = fread( $stream, $remaining_length ); - if ( false === $chunk || ( '' === $chunk && feof( $stream ) ) ) { - return strlen( $data ) ? $data : false; + if (strlen($this->byte_buffer) < $n) { + if (!$this->byte_reader->next_bytes()) { + if ($this->byte_reader->is_finished()) { + $this->state = self::STATE_COMPLETE; + } else { + $this->paused_incomplete_input = true; + } + return false; } - $remaining_length -= strlen( $chunk ); - $data .= $chunk; + $this->byte_buffer .= $this->byte_reader->get_bytes(); } - return $data; + $bytes = substr($this->byte_buffer, 0, $n); + $this->byte_buffer = substr($this->byte_buffer, $n); + $this->file_bytes_consumed_so_far += $n; + return $bytes; } + } + diff --git a/src/WordPress/Zip/functions.php b/src/WordPress/Zip/functions.php deleted file mode 100644 index 226111df..00000000 --- a/src/WordPress/Zip/functions.php +++ /dev/null @@ -1,36 +0,0 @@ -isFileEntry() ) { - continue; - } - - $path = Path::canonicalize( $to_path . '/' . $entry->path ); - $parent = Path::getDirectory( $path ); - if ( ! is_dir( $parent ) ) { - if(is_file($parent)) { - unlink($parent); - } - mkdir( $parent, 0777, true ); - } - - if ( $entry->isDirectory ) { - if ( ! is_dir( $path ) ) { - mkdir( $path, 0777, true ); - } - } else { - file_put_contents( $path, $entry->bytes ); - } - } - - return feof( $fp ) ? 1 : 0; -}