Single file: Guard against partial cleanup of extracted files (#32649)
authorSwaroop Sridhar <Swaroop.Sridhar@microsoft.com>
Tue, 25 Feb 2020 21:21:20 +0000 (13:21 -0800)
committerGitHub <noreply@github.com>
Tue, 25 Feb 2020 21:21:20 +0000 (13:21 -0800)
This change fixes https://github.com/dotnet/runtime/issues/3778
This change is mainly targeted to be servicing fix for .net core 3.1.

When executing single-file apps, if a pre-existing extraction exists, the contents of the extraction directory are now verified.
If files are missing, they are recovered.

**Extraction Algorithm**

`ExtractionDir` = `$DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/<bundle-id>`
`WorkingDir` = `$DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/<process-id>`

If `ExtractionDir` does not exist, then

Create `WorkingDir`, and extract bundle contents within it
Attempt to rename `WorkingDir` as `ExtractionDir`
If the rename succeeds, continue execution of the app
If not, another process completed extraction; Verify and reuse this extraction (as in step 2).
If `ExtractionDir` exists, then verify that it contains all files listed in the manifest.

If all contents are available,
Remove `WorkingDir`
Continue execution of the app, reusing the contents.

If certain files are missing within `ExtractionDir`, then
For each missing file, do the following individually
  Extract the files within `WorkingDir`
  Create sub-paths within `ExtractionDir` if necessary
  Rename the file from `WorkingDir/path/<file>` to `ExtractionDir/path/<file>` unless `ExtractionDir/path/<file>` exists (extracted there by another process in the meantime)
Remove `WorkingDir`
Continue execution of the app.

All of the renames above are done with appropriate retries to circumvent interference from anti-virus apps.

**Startup time impact**
* Console HelloWorld execution time:
    * Framework dependent app: Windows/Linux No measurable difference
    * Self-contained app: Windows: ~10ms additional
    * Self-contained app: Linux: No measurable difference
* Greenshot Startup:
    * Self-contained Windows: No noticiable/measurable difference
* NugetPackageExplorer Startup:
    * Self-contained Windows: No noticiable/measurable difference

src/installer/corehost/cli/apphost/bundle/dir_utils.cpp
src/installer/corehost/cli/apphost/bundle/dir_utils.h
src/installer/corehost/cli/apphost/bundle/extractor.cpp
src/installer/corehost/cli/apphost/bundle/extractor.h
src/installer/corehost/cli/apphost/bundle/runner.cpp
src/installer/test/Microsoft.NET.HostModel.Tests/AppHost.Bundle.Tests/BundleExtractToSpecificPath.cs

index 2f067b7..eede952 100644 (file)
@@ -56,7 +56,10 @@ void dir_utils_t::remove_directory_tree(const pal::string_t& path)
 
     for (const pal::string_t &dir : dirs)
     {
-        remove_directory_tree(dir);
+        pal::string_t dir_path = path;
+        append_path(&dir_path, dir.c_str());
+
+        remove_directory_tree(dir_path);
     }
 
     std::vector<pal::string_t> files;
@@ -64,9 +67,12 @@ void dir_utils_t::remove_directory_tree(const pal::string_t& path)
 
     for (const pal::string_t &file : files)
     {
-        if (!pal::remove(file.c_str()))
+        pal::string_t file_path = path;
+        append_path(&file_path, file.c_str());
+
+        if (!pal::remove(file_path.c_str()))
         {
-            trace::warning(_X("Failed to remove temporary file [%s]."), file.c_str());
+            trace::warning(_X("Failed to remove temporary file [%s]."), file_path.c_str());
         }
     }
 
@@ -91,3 +97,46 @@ void dir_utils_t::fixup_path_separator(pal::string_t& path)
         }
     }
 }
+
+// Retry the rename operation with some wait in between the attempts.
+// This is an attempt to workaround for possible file locking caused by AV software.
+
+bool dir_utils_t::rename_with_retries(pal::string_t& old_name, pal::string_t& new_name, bool& dir_exists)
+{
+    for (int retry_count=0; retry_count < 500; retry_count++)
+    {
+        if (pal::rename(old_name.c_str(), new_name.c_str()) == 0)
+        {
+            return true;
+        }
+        bool should_retry = errno == EACCES;
+
+        if (pal::directory_exists(new_name))
+        {
+            // Check directory_exists() on each run, because a concurrent process may have
+            // created the new_name directory.
+            //
+            // The rename() operation above fails with errono == EACCESS if 
+            // * Directory new_name already exists, or
+            // * Paths are invalid paths, or
+            // * Due to locking/permission problems.
+            // Therefore, we need to perform the directory_exists() check again.
+
+            dir_exists = true;
+            return false;
+        }
+
+        if (should_retry)
+        {
+            trace::info(_X("Retrying Rename [%s] to [%s] due to EACCES error"), old_name.c_str(), new_name.c_str());
+            pal::sleep(100);
+            continue;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    return false;
+}
index 8824153..ffe0acf 100644 (file)
@@ -17,6 +17,7 @@ namespace bundle
         static void remove_directory_tree(const pal::string_t &path);
         static void create_directory_tree(const pal::string_t &path);
         static void fixup_path_separator(pal::string_t& path);
+        static bool rename_with_retries(pal::string_t& old_name, pal::string_t& new_name, bool &new_dir_exists);
     };
 }
 
index 6fd9b5f..71bb225 100644 (file)
 
 using namespace bundle;
 
-// Compute the final extraction location as:
-// m_extraction_dir = $DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/<id>/...
-//
-// If DOTNET_BUNDLE_EXTRACT_BASE_DIR is not set in the environment, the 
-// base directory defaults to $TMPDIR/.net
-void extractor_t::determine_extraction_dir()
+pal::string_t& extractor_t::extraction_dir()
 {
-    if (!pal::getenv(_X("DOTNET_BUNDLE_EXTRACT_BASE_DIR"), &m_extraction_dir))
+    if (m_extraction_dir.empty())
     {
-        if (!pal::get_default_bundle_extraction_base_dir(m_extraction_dir))
+        // Compute the final extraction location as:
+        // m_extraction_dir = $DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/<id>/...   
+        //     
+        // If DOTNET_BUNDLE_EXTRACT_BASE_DIR is not set in the environment, 
+        // a default is choosen within the temporary directory.
+
+        if (!pal::getenv(_X("DOTNET_BUNDLE_EXTRACT_BASE_DIR"), &m_extraction_dir))
         {
-            trace::error(_X("Failure processing application bundle."));
-            trace::error(_X("Failed to determine location for extracting embedded files."));
-            trace::error(_X("DOTNET_BUNDLE_EXTRACT_BASE_DIR is not set, and a read-write temp-directory couldn't be created."));
-            throw StatusCode::BundleExtractionFailure;
+            if (!pal::get_default_bundle_extraction_base_dir(m_extraction_dir))
+            {
+                trace::error(_X("Failure processing application bundle."));
+                trace::error(_X("Failed to determine location for extracting embedded files."));
+                trace::error(_X("DOTNET_BUNDLE_EXTRACT_BASE_DIR is not set, and a read-write temp-directory couldn't be created."));
+                throw StatusCode::BundleExtractionFailure;
+            }
         }
-    }
 
-    pal::string_t host_name = strip_executable_ext(get_filename(m_bundle_path));
-    append_path(&m_extraction_dir, host_name.c_str());
-    append_path(&m_extraction_dir, m_bundle_id.c_str());
+        pal::string_t host_name = strip_executable_ext(get_filename(m_bundle_path));
+        append_path(&m_extraction_dir, host_name.c_str());
+        append_path(&m_extraction_dir, m_bundle_id.c_str());
+
+        trace::info(_X("Files embedded within the bundled will be extracted to [%s] directory."), m_extraction_dir.c_str());
+    }
 
-    trace::info(_X("Files embedded within the bundled will be extracted to [%s] directory."), m_extraction_dir.c_str());
+    return m_extraction_dir;
 }
 
-// Compute the working extraction location for this process, before the 
-// extracted files are committed to the final location
-// m_working_extraction_dir = $DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/<proc-id-hex>
-void extractor_t::determine_working_extraction_dir()
+pal::string_t& extractor_t::working_extraction_dir()
 {
-    m_working_extraction_dir = get_directory(extraction_dir());
-    pal::char_t pid[32];
-    pal::snwprintf(pid, 32, _X("%x"), pal::get_pid());
-    append_path(&m_working_extraction_dir, pid);
+    if (m_working_extraction_dir.empty())
+    {
+        // Compute the working extraction location for this process, 
+        // before the extracted files are committed to the final location
+        // working_extraction_dir = $DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/<proc-id-hex>
 
-    dir_utils_t::create_directory_tree(m_working_extraction_dir);
+        m_working_extraction_dir = get_directory(extraction_dir());
+        pal::char_t pid[32];
+        pal::snwprintf(pid, 32, _X("%x"), pal::get_pid());
+        append_path(&m_working_extraction_dir, pid);
+
+        trace::info(_X("Temporary directory used to extract bundled files is [%s]."), m_working_extraction_dir.c_str());
+    }
 
-    trace::info(_X("Temporary directory used to extract bundled files is [%s]."), m_working_extraction_dir.c_str());
+    return m_working_extraction_dir;
 }
 
 // Create a file to be extracted out on disk, including any intermediate sub-directories.
 FILE* extractor_t::create_extraction_file(const pal::string_t& relative_path)
 {
-    pal::string_t file_path = m_working_extraction_dir;
+    pal::string_t file_path = working_extraction_dir();
     append_path(&file_path, relative_path.c_str());
 
-    // m_working_extraction_dir is assumed to exist, 
+    // working_extraction_dir is assumed to exist, 
     // so we only create sub-directories if relative_path contains directories
     if (dir_utils_t::has_dirs_in_path(relative_path))
     {
@@ -92,29 +102,6 @@ void extractor_t::extract(const file_entry_t &entry, reader_t &reader)
     fclose(file);
 }
 
-pal::string_t& extractor_t::extraction_dir()
-{
-    if (m_extraction_dir.empty())
-    {
-        determine_extraction_dir();
-    }
-
-    return m_extraction_dir;
-}
-
-bool extractor_t::can_reuse_extraction()
-{
-    // In this version, the extracted files are assumed to be 
-    // correct by construction.
-    // 
-    // Files embedded in the bundle are first extracted to m_working_extraction_dir
-    // Once all files are successfully extracted, the extraction location is 
-    // committed (renamed) to m_extraction_dir. Therefore, the presence of 
-    // m_extraction_dir means that the files are pre-extracted. 
-
-    return pal::directory_exists(extraction_dir());
-}
-
 void extractor_t::begin()
 {
     // Files are extracted to a specific deterministic location on disk
@@ -126,58 +113,138 @@ void extractor_t::begin()
     //
     // In order to solve these issues, we implement a extraction as a two-phase approach:
     // 1) Files embedded in a bundle are extracted to a process-specific temporary
-    //    extraction location (m_working_extraction_dir)
-    // 2) Upon successful extraction, m_working_extraction_dir is renamed to the actual
-    //    extraction location (m_extraction_dir)
+    //    extraction location (working_extraction_dir)
+    // 2) Upon successful extraction, working_extraction_dir is renamed to the actual
+    //    extraction location (extraction_dir)
     //    
     // This effectively creates a file-lock to protect against races and failed extractions.
 
-    determine_working_extraction_dir();
+
+    dir_utils_t::create_directory_tree(working_extraction_dir());
+}
+
+void extractor_t::clean()
+{
+    dir_utils_t::remove_directory_tree(working_extraction_dir());
 }
 
-void extractor_t::commit()
+void extractor_t::commit_dir()
 {
-    // Commit files to the final extraction directory
+    // Commit an entire new extraction to the final extraction directory
     // Retry the move operation with some wait in between the attempts. This is to workaround for possible file locking
     // caused by AV software. Basically the extraction process above writes a bunch of executable files to disk
     // and some AV software may decide to scan them on write. If this happens the files will be locked which blocks
     // our ablity to move them.
-    int retry_count = 500;
-    while (true)
+
+    bool extracted_by_concurrent_process = false;
+    bool extracted_by_current_process =
+        dir_utils_t::rename_with_retries(working_extraction_dir(), extraction_dir(), extracted_by_concurrent_process);
+
+    if (extracted_by_concurrent_process)
     {
-        if (pal::rename(m_working_extraction_dir.c_str(), m_extraction_dir.c_str()) == 0)
-            break;
+        // Another process successfully extracted the dependencies
+        trace::info(_X("Extraction completed by another process, aborting current extraction."));
+        clean();
+    }
 
-        bool should_retry = errno == EACCES;
-        if (can_reuse_extraction())
-        {
-            // Another process successfully extracted the dependencies
-            trace::info(_X("Extraction completed by another process, aborting current extraction."));
+    if (!extracted_by_current_process && !extracted_by_concurrent_process)
+    {
+        trace::error(_X("Failure processing application bundle."));
+        trace::error(_X("Failed to commit extracted files to directory [%s]."), extraction_dir().c_str());
+        throw StatusCode::BundleExtractionFailure;
+    }
 
-            dir_utils_t::remove_directory_tree(m_working_extraction_dir);
-            break;
-        }
+    trace::info(_X("Completed new extraction."));
+}
 
-        if (should_retry && (retry_count--) > 0)
-        {
-            trace::info(_X("Retrying extraction due to EACCES trying to rename the extraction folder to [%s]."), m_extraction_dir.c_str());
-            pal::sleep(100);
-            continue;
-        }
-        else
-        {
-            trace::error(_X("Failure processing application bundle."));
-            trace::error(_X("Failed to commit extracted files to directory [%s]."), m_extraction_dir.c_str());
-            throw StatusCode::BundleExtractionFailure;
-        }
+void extractor_t::commit_file(const pal::string_t& relative_path)
+{
+    // Commit individual files to the final extraction directory.
+
+    pal::string_t working_file_path = working_extraction_dir();
+    append_path(&working_file_path, relative_path.c_str());
+
+    pal::string_t final_file_path = extraction_dir();
+    append_path(&final_file_path, relative_path.c_str());
+
+    if (dir_utils_t::has_dirs_in_path(relative_path))
+    {
+        dir_utils_t::create_directory_tree(get_directory(final_file_path));
     }
+
+    bool extracted_by_concurrent_process = false;
+    bool extracted_by_current_process =
+        dir_utils_t::rename_with_retries(working_file_path, final_file_path, extracted_by_concurrent_process);
+
+    if (extracted_by_concurrent_process)
+    {
+        // Another process successfully extracted the dependencies
+        trace::info(_X("Extraction completed by another process, aborting current extraction."));
+    }
+
+    if (!extracted_by_current_process && !extracted_by_concurrent_process)
+    {
+        trace::error(_X("Failure processing application bundle."));
+        trace::error(_X("Failed to commit extracted files to directory [%s]."), extraction_dir().c_str());
+        throw StatusCode::BundleExtractionFailure;
+    }
+
+    trace::info(_X("Extraction recovered [%s]"), relative_path.c_str());
 }
 
-void extractor_t::extract(const manifest_t& manifest, reader_t& reader)
+void extractor_t::extract_new(reader_t& reader)
 {
     begin();
-    for (const file_entry_t& entry : manifest.files) {
+    for (const file_entry_t& entry : m_manifest.files) 
+    {
         extract(entry, reader);
     }
-    commit();
+    commit_dir();
+}
+
+// Verify an existing extraction contains all files listed in the bundle manifest.
+// If some files are missing, extract them individually.
+void extractor_t::verify_recover_extraction(reader_t& reader)
+{
+    pal::string_t& ext_dir = extraction_dir();
+    bool recovered = false;
+
+    for (const file_entry_t& entry : m_manifest.files)
+    {
+        pal::string_t file_path = ext_dir;
+        append_path(&file_path, entry.relative_path().c_str());
+
+        if (!pal::file_exists(file_path))
+        {
+            if (!recovered)
+            {
+                recovered = true;
+                begin();
+            }
+
+            extract(entry, reader);
+            commit_file(entry.relative_path());
+        }
+    }
+
+    if (recovered)
+    {
+        clean();
+    }
+}
+
+pal::string_t& extractor_t::extract(reader_t& reader)
+{
+    if (pal::directory_exists(extraction_dir()))
+    {
+        trace::info(_X("Reusing existing extraction of application bundle."));
+        verify_recover_extraction(reader);
+    }
+    else
+    {
+        trace::info(_X("Starting new extraction of application bundle."));
+        extract_new(reader);
+    }
+
+    return m_extraction_dir;
 }
index eb45e2c..73b75ec 100644 (file)
@@ -13,32 +13,39 @@ namespace bundle
     class extractor_t
     {
     public:
-        extractor_t(const pal::string_t &bundle_id, const pal::string_t& bundle_path)
-            :m_extraction_dir(), m_working_extraction_dir()
+        extractor_t(const pal::string_t &bundle_id,
+                    const pal::string_t& bundle_path,
+                    const manifest_t &manifest)
+            :m_extraction_dir(),
+             m_working_extraction_dir(),
+             m_manifest(manifest)
         {
             m_bundle_id = bundle_id;
             m_bundle_path = bundle_path;
         }
 
-        pal::string_t& extraction_dir();
-        bool can_reuse_extraction();
-
-        void extract(const manifest_t &manifest, reader_t& reader);
+        pal::string_t& extract(reader_t& reader);
 
     private:
-        void determine_extraction_dir();
-        void determine_working_extraction_dir();
+        pal::string_t& extraction_dir();
+        pal::string_t& working_extraction_dir();
+
+        void extract_new(reader_t& reader);
+        void verify_recover_extraction(reader_t& reader);
 
         FILE* create_extraction_file(const pal::string_t& relative_path);
+        void extract(const file_entry_t& entry, reader_t& reader);
 
         void begin();
-        void extract(const file_entry_t& entry, reader_t& reader);
-        void commit();
+        void commit_file(const pal::string_t& relative_path);
+        void commit_dir();
+        void clean();
 
         pal::string_t m_bundle_id;
         pal::string_t m_bundle_path;
         pal::string_t m_extraction_dir;
         pal::string_t m_working_extraction_dir;
+        const manifest_t& m_manifest;
     };
 }
 
index 1486204..b8dc20e 100644 (file)
@@ -46,19 +46,15 @@ StatusCode runner_t::extract()
         reader.set_offset(marker_t::header_offset());
         header_t header = header_t::read(reader);
 
-        extractor_t extractor(header.bundle_id(), m_bundle_path);
-        m_extraction_dir = extractor.extraction_dir();
+        // Read the bundle manifest
+        // Reader is at the correct offset
+        manifest_t manifest = manifest_t::read(reader, header.num_embedded_files());
 
-        // Determine if embedded files are already extracted, and available for reuse
-        if (!extractor.can_reuse_extraction())
-        {
-            manifest_t manifest = manifest_t::read(reader, header.num_embedded_files());
-
-            extractor.extract(manifest, reader);
-        }
+        // Extract the files 
+        extractor_t extractor(header.bundle_id(), m_bundle_path, manifest);
+        m_extraction_dir = extractor.extract(reader);
 
         unmap_host();
-
         return StatusCode::Success;
     }
     catch (StatusCode e)
index 5812d2d..c6e2322 100644 (file)
@@ -118,6 +118,58 @@ namespace AppHost.Bundle.Tests
             extractDir.Should().NotBeModifiedAfter(firstWriteTime);
         }
 
+        [Fact]
+        private void Bundle_extraction_can_recover_missing_files()
+        {
+            var fixture = sharedTestState.TestFixture.Copy();
+            var hostName = BundleHelper.GetHostName(fixture);
+            var appName = Path.GetFileNameWithoutExtension(hostName);
+            string publishPath = BundleHelper.GetPublishPath(fixture);
+
+            // Publish the bundle
+            var bundleDir = BundleHelper.GetBundleDir(fixture);
+            var bundler = new Microsoft.NET.HostModel.Bundle.Bundler(hostName, bundleDir.FullName);
+            string singleFile = bundler.GenerateBundle(publishPath);
+
+            // Compute bundled files
+            List<string> bundledFiles = bundler.BundleManifest.Files.Select(file => file.RelativePath).ToList();
+
+            // Create a directory for extraction.
+            var extractBaseDir = BundleHelper.GetExtractDir(fixture);
+            string extractPath = Path.Combine(extractBaseDir.FullName, appName, bundler.BundleManifest.BundleID);
+            var extractDir = new DirectoryInfo(extractPath);
+
+            // Run the bunded app for the first time, and extract files to 
+            // $DOTNET_BUNDLE_EXTRACT_BASE_DIR/<app>/bundle-id
+            Command.Create(singleFile)
+                .CaptureStdErr()
+                .CaptureStdOut()
+                .EnvironmentVariable(BundleHelper.DotnetBundleExtractBaseEnvVariable, extractBaseDir.FullName)
+                .Execute()
+                .Should()
+                .Pass()
+                .And
+                .HaveStdOutContaining("Hello World");
+
+            bundledFiles.ForEach(file => File.Delete(Path.Combine(extractPath, file)));
+
+            extractDir.Should().Exist();
+            extractDir.Should().NotHaveFiles(bundledFiles);
+
+            // Run the bundled app again (recover deleted files)
+            Command.Create(singleFile)
+                .CaptureStdErr()
+                .CaptureStdOut()
+                .EnvironmentVariable(BundleHelper.DotnetBundleExtractBaseEnvVariable, extractBaseDir.FullName)
+                .Execute()
+                .Should()
+                .Pass()
+                .And
+                .HaveStdOutContaining("Hello World");
+
+            extractDir.Should().HaveFiles(bundledFiles);
+        }
+
 
         public class SharedTestState : IDisposable
         {