23 "github.com/Microsoft/go-winio"
24 "github.com/Microsoft/go-winio/archive/tar"
25 "github.com/Microsoft/go-winio/backuptar"
26 "github.com/Microsoft/hcsshim"
27 "github.com/Sirupsen/logrus"
28 "github.com/docker/docker/daemon/graphdriver"
29 "github.com/docker/docker/pkg/archive"
30 "github.com/docker/docker/pkg/idtools"
31 "github.com/docker/docker/pkg/ioutils"
32 "github.com/docker/docker/pkg/longpath"
33 "github.com/docker/docker/pkg/reexec"
34 "github.com/docker/docker/pkg/system"
35 units "github.com/docker/go-units"
36 "golang.org/x/sys/windows"
39 // filterDriver is an HCSShim driver type for the Windows Filter driver.
40 const filterDriver = 1
43 // mutatedFiles is a list of files that are mutated by the import process
44 // and must be backed up and restored.
45 mutatedFiles = map[string]string{
46 "UtilityVM/Files/EFI/Microsoft/Boot/BCD": "bcd.bak",
47 "UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG": "bcd.log.bak",
48 "UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG1": "bcd.log1.bak",
49 "UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG2": "bcd.log2.bak",
54 // init registers the windows graph drivers to the register.
56 graphdriver.Register("windowsfilter", InitFilter)
57 // DOCKER_WINDOWSFILTER_NOREEXEC allows for inline processing which makes
58 // debugging issues in the re-exec codepath significantly easier.
59 if os.Getenv("DOCKER_WINDOWSFILTER_NOREEXEC") != "" {
60 logrus.Warnf("WindowsGraphDriver is set to not re-exec. This is intended for debugging purposes only.")
63 reexec.Register("docker-windows-write-layer", writeLayerReexec)
70 func (c *checker) IsMounted(path string) bool {
74 // Driver represents a windows graph driver.
76 // info stores the shim driver information
77 info hcsshim.DriverInfo
78 ctr *graphdriver.RefCounter
79 // it is safe for windows to use a cache here because it does not support
80 // restoring containers when the daemon dies.
82 cache map[string]string
85 // InitFilter returns a new Windows storage filter driver.
86 func InitFilter(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
87 logrus.Debugf("WindowsGraphDriver InitFilter at %s", home)
89 fsType, err := getFileSystemType(string(home[0]))
93 if strings.ToLower(fsType) == "refs" {
94 return nil, fmt.Errorf("%s is on an ReFS volume - ReFS volumes are not supported", home)
97 if err := idtools.MkdirAllAs(home, 0700, 0, 0); err != nil {
98 return nil, fmt.Errorf("windowsfilter failed to create '%s': %v", home, err)
102 info: hcsshim.DriverInfo{
104 Flavour: filterDriver,
106 cache: make(map[string]string),
107 ctr: graphdriver.NewRefCounter(&checker{}),
112 // win32FromHresult is a helper function to get the win32 error code from an HRESULT
113 func win32FromHresult(hr uintptr) uintptr {
114 if hr&0x1fff0000 == 0x00070000 {
120 // getFileSystemType obtains the type of a file system through GetVolumeInformation
121 // https://msdn.microsoft.com/en-us/library/windows/desktop/aa364993(v=vs.85).aspx
122 func getFileSystemType(drive string) (fsType string, hr error) {
124 modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
125 procGetVolumeInformation = modkernel32.NewProc("GetVolumeInformationW")
126 buf = make([]uint16, 255)
127 size = syscall.MAX_PATH + 1
130 hr = errors.New("getFileSystemType must be called with a drive letter")
134 n := uintptr(unsafe.Pointer(nil))
135 r0, _, _ := syscall.Syscall9(procGetVolumeInformation.Addr(), 8, uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(drive))), n, n, n, n, n, uintptr(unsafe.Pointer(&buf[0])), uintptr(size), 0)
137 hr = syscall.Errno(win32FromHresult(r0))
139 fsType = syscall.UTF16ToString(buf)
143 // String returns the string representation of a driver. This should match
144 // the name the graph driver has been registered with.
145 func (d *Driver) String() string {
146 return "windowsfilter"
149 // Status returns the status of the driver.
150 func (d *Driver) Status() [][2]string {
156 // panicIfUsedByLcow does exactly what it says.
157 // TODO @jhowardmsft - this is a temporary measure for the bring-up of
158 // Linux containers on Windows. It is a failsafe to ensure that the right
159 // graphdriver is used.
160 func panicIfUsedByLcow() {
161 if system.LCOWSupported() {
162 panic("inconsistency - windowsfilter graphdriver should not be used when in LCOW mode")
166 // Exists returns true if the given id is registered with this driver.
167 func (d *Driver) Exists(id string) bool {
169 rID, err := d.resolveID(id)
173 result, err := hcsshim.LayerExists(d.info, rID)
180 // CreateReadWrite creates a layer that is writable for use as a container
182 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
185 return d.create(id, parent, opts.MountLabel, false, opts.StorageOpt)
187 return d.create(id, parent, "", false, nil)
190 // Create creates a new read-only layer with the given id.
191 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
194 return d.create(id, parent, opts.MountLabel, true, opts.StorageOpt)
196 return d.create(id, parent, "", true, nil)
199 func (d *Driver) create(id, parent, mountLabel string, readOnly bool, storageOpt map[string]string) error {
200 rPId, err := d.resolveID(parent)
205 parentChain, err := d.getLayerChain(rPId)
210 var layerChain []string
213 parentPath, err := hcsshim.GetLayerMountPath(d.info, rPId)
217 if _, err := os.Stat(filepath.Join(parentPath, "Files")); err == nil {
218 // This is a legitimate parent layer (not the empty "-init" layer),
219 // so include it in the layer chain.
220 layerChain = []string{parentPath}
224 layerChain = append(layerChain, parentChain...)
227 if err := hcsshim.CreateLayer(d.info, id, rPId); err != nil {
231 var parentPath string
232 if len(layerChain) != 0 {
233 parentPath = layerChain[0]
236 if err := hcsshim.CreateSandboxLayer(d.info, id, parentPath, layerChain); err != nil {
240 storageOptions, err := parseStorageOpt(storageOpt)
242 return fmt.Errorf("Failed to parse storage options - %s", err)
245 if storageOptions.size != 0 {
246 if err := hcsshim.ExpandSandboxSize(d.info, id, storageOptions.size); err != nil {
252 if _, err := os.Lstat(d.dir(parent)); err != nil {
253 if err2 := hcsshim.DestroyLayer(d.info, id); err2 != nil {
254 logrus.Warnf("Failed to DestroyLayer %s: %s", id, err2)
256 return fmt.Errorf("Cannot create layer with missing parent %s: %s", parent, err)
259 if err := d.setLayerChain(id, layerChain); err != nil {
260 if err2 := hcsshim.DestroyLayer(d.info, id); err2 != nil {
261 logrus.Warnf("Failed to DestroyLayer %s: %s", id, err2)
269 // dir returns the absolute path to the layer.
270 func (d *Driver) dir(id string) string {
271 return filepath.Join(d.info.HomeDir, filepath.Base(id))
274 // Remove unmounts and removes the dir information.
275 func (d *Driver) Remove(id string) error {
277 rID, err := d.resolveID(id)
282 // This retry loop is due to a bug in Windows (Internal bug #9432268)
283 // if GetContainers fails with ErrVmcomputeOperationInvalidState
284 // it is a transient error. Retry until it succeeds.
285 var computeSystems []hcsshim.ContainerProperties
287 osv := system.GetOSVersion()
289 // Get and terminate any template VMs that are currently using the layer.
290 // Note: It is unfortunate that we end up in the graphdrivers Remove() call
291 // for both containers and images, but the logic for template VMs is only
292 // needed for images - specifically we are looking to see if a base layer
293 // is in use by a template VM as a result of having started a Hyper-V
294 // container at some point.
296 // We have a retry loop for ErrVmcomputeOperationInvalidState and
297 // ErrVmcomputeOperationAccessIsDenied as there is a race condition
298 // in RS1 and RS2 building during enumeration when a silo is going away
299 // for example under it, in HCS. AccessIsDenied added to fix 30278.
301 // TODO @jhowardmsft - For RS3, we can remove the retries. Also consider
302 // using platform APIs (if available) to get this more succinctly. Also
303 // consider enlighting the Remove() interface to have context of why
304 // the remove is being called - that could improve efficiency by not
305 // enumerating compute systems during a remove of a container as it's
307 computeSystems, err = hcsshim.GetContainers(hcsshim.ComputeSystemQuery{})
309 if (osv.Build < 15139) &&
310 ((err == hcsshim.ErrVmcomputeOperationInvalidState) || (err == hcsshim.ErrVmcomputeOperationAccessIsDenied)) {
311 if retryCount >= 500 {
315 time.Sleep(10 * time.Millisecond)
323 for _, computeSystem := range computeSystems {
324 if strings.Contains(computeSystem.RuntimeImagePath, id) && computeSystem.IsRuntimeTemplate {
325 container, err := hcsshim.OpenContainer(computeSystem.ID)
329 defer container.Close()
330 err = container.Terminate()
331 if hcsshim.IsPending(err) {
332 err = container.Wait()
333 } else if hcsshim.IsAlreadyStopped(err) {
343 layerPath := filepath.Join(d.info.HomeDir, rID)
344 tmpID := fmt.Sprintf("%s-removing", rID)
345 tmpLayerPath := filepath.Join(d.info.HomeDir, tmpID)
346 if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
349 if err := hcsshim.DestroyLayer(d.info, tmpID); err != nil {
350 logrus.Errorf("Failed to DestroyLayer %s: %s", id, err)
356 // Get returns the rootfs path for the id. This will mount the dir at its given path.
357 func (d *Driver) Get(id, mountLabel string) (string, error) {
359 logrus.Debugf("WindowsGraphDriver Get() id %s mountLabel %s", id, mountLabel)
362 rID, err := d.resolveID(id)
366 if count := d.ctr.Increment(rID); count > 1 {
367 return d.cache[rID], nil
370 // Getting the layer paths must be done outside of the lock.
371 layerChain, err := d.getLayerChain(rID)
377 if err := hcsshim.ActivateLayer(d.info, rID); err != nil {
381 if err := hcsshim.PrepareLayer(d.info, rID, layerChain); err != nil {
383 if err2 := hcsshim.DeactivateLayer(d.info, rID); err2 != nil {
384 logrus.Warnf("Failed to Deactivate %s: %s", id, err)
389 mountPath, err := hcsshim.GetLayerMountPath(d.info, rID)
392 if err := hcsshim.UnprepareLayer(d.info, rID); err != nil {
393 logrus.Warnf("Failed to Unprepare %s: %s", id, err)
395 if err2 := hcsshim.DeactivateLayer(d.info, rID); err2 != nil {
396 logrus.Warnf("Failed to Deactivate %s: %s", id, err)
401 d.cache[rID] = mountPath
404 // If the layer has a mount path, use that. Otherwise, use the
415 // Put adds a new layer to the driver.
416 func (d *Driver) Put(id string) error {
418 logrus.Debugf("WindowsGraphDriver Put() id %s", id)
420 rID, err := d.resolveID(id)
424 if count := d.ctr.Decrement(rID); count > 0 {
428 _, exists := d.cache[rID]
432 // If the cache was not populated, then the layer was left unprepared and deactivated
437 if err := hcsshim.UnprepareLayer(d.info, rID); err != nil {
440 return hcsshim.DeactivateLayer(d.info, rID)
443 // Cleanup ensures the information the driver stores is properly removed.
444 // We use this opportunity to cleanup any -removing folders which may be
445 // still left if the daemon was killed while it was removing a layer.
446 func (d *Driver) Cleanup() error {
447 items, err := ioutil.ReadDir(d.info.HomeDir)
449 if os.IsNotExist(err) {
455 // Note we don't return an error below - it's possible the files
456 // are locked. However, next time around after the daemon exits,
457 // we likely will be able to to cleanup successfully. Instead we log
458 // warnings if there are errors.
459 for _, item := range items {
460 if item.IsDir() && strings.HasSuffix(item.Name(), "-removing") {
461 if err := hcsshim.DestroyLayer(d.info, item.Name()); err != nil {
462 logrus.Warnf("Failed to cleanup %s: %s", item.Name(), err)
464 logrus.Infof("Cleaned up %s", item.Name())
472 // Diff produces an archive of the changes between the specified
473 // layer and its parent layer which may be "".
474 // The layer should be mounted when calling this function
475 func (d *Driver) Diff(id, parent string) (_ io.ReadCloser, err error) {
477 rID, err := d.resolveID(id)
482 layerChain, err := d.getLayerChain(rID)
487 // this is assuming that the layer is unmounted
488 if err := hcsshim.UnprepareLayer(d.info, rID); err != nil {
492 if err := hcsshim.PrepareLayer(d.info, rID, layerChain); err != nil {
493 logrus.Warnf("Failed to Deactivate %s: %s", rID, err)
497 arch, err := d.exportLayer(rID, layerChain)
502 return ioutils.NewReadCloserWrapper(arch, func() error {
509 // Changes produces a list of changes between the specified layer
510 // and its parent layer. If parent is "", then all changes will be ADD changes.
511 // The layer should not be mounted when calling this function.
512 func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
514 rID, err := d.resolveID(id)
518 parentChain, err := d.getLayerChain(rID)
523 if err := hcsshim.ActivateLayer(d.info, rID); err != nil {
527 if err2 := hcsshim.DeactivateLayer(d.info, rID); err2 != nil {
528 logrus.Errorf("changes() failed to DeactivateLayer %s %s: %s", id, rID, err2)
532 var changes []archive.Change
533 err = winio.RunWithPrivilege(winio.SeBackupPrivilege, func() error {
534 r, err := hcsshim.NewLayerReader(d.info, id, parentChain)
541 name, _, fileInfo, err := r.Next()
548 name = filepath.ToSlash(name)
550 changes = append(changes, archive.Change{Path: name, Kind: archive.ChangeDelete})
552 // Currently there is no way to tell between an add and a modify.
553 changes = append(changes, archive.Change{Path: name, Kind: archive.ChangeModify})
564 // ApplyDiff extracts the changeset from the given diff into the
565 // layer with the specified id and parent, returning the size of the
566 // new layer in bytes.
567 // The layer should not be mounted when calling this function
568 func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
570 var layerChain []string
572 rPId, err := d.resolveID(parent)
576 parentChain, err := d.getLayerChain(rPId)
580 parentPath, err := hcsshim.GetLayerMountPath(d.info, rPId)
584 layerChain = append(layerChain, parentPath)
585 layerChain = append(layerChain, parentChain...)
588 size, err := d.importLayer(id, diff, layerChain)
593 if err = d.setLayerChain(id, layerChain); err != nil {
600 // DiffSize calculates the changes between the specified layer
601 // and its parent and returns the size in bytes of the changes
602 // relative to its base filesystem directory.
603 func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
605 rPId, err := d.resolveID(parent)
610 changes, err := d.Changes(id, rPId)
615 layerFs, err := d.Get(id, "")
621 return archive.ChangesSize(layerFs, changes), nil
624 // GetMetadata returns custom driver information.
625 func (d *Driver) GetMetadata(id string) (map[string]string, error) {
627 m := make(map[string]string)
632 func writeTarFromLayer(r hcsshim.LayerReader, w io.Writer) error {
633 t := tar.NewWriter(w)
635 name, size, fileInfo, err := r.Next()
643 // Write a whiteout file.
645 Name: filepath.ToSlash(filepath.Join(filepath.Dir(name), archive.WhiteoutPrefix+filepath.Base(name))),
647 err := t.WriteHeader(hdr)
652 err = backuptar.WriteTarFileFromBackupStream(t, r, name, size, fileInfo)
661 // exportLayer generates an archive from a layer based on the given ID.
662 func (d *Driver) exportLayer(id string, parentLayerPaths []string) (io.ReadCloser, error) {
663 archive, w := io.Pipe()
665 err := winio.RunWithPrivilege(winio.SeBackupPrivilege, func() error {
666 r, err := hcsshim.NewLayerReader(d.info, id, parentLayerPaths)
671 err = writeTarFromLayer(r, w)
678 w.CloseWithError(err)
684 // writeBackupStreamFromTarAndSaveMutatedFiles reads data from a tar stream and
685 // writes it to a backup stream, and also saves any files that will be mutated
686 // by the import layer process to a backup location.
687 func writeBackupStreamFromTarAndSaveMutatedFiles(buf *bufio.Writer, w io.Writer, t *tar.Reader, hdr *tar.Header, root string) (nextHdr *tar.Header, err error) {
688 var bcdBackup *os.File
689 var bcdBackupWriter *winio.BackupFileWriter
690 if backupPath, ok := mutatedFiles[hdr.Name]; ok {
691 bcdBackup, err = os.Create(filepath.Join(root, backupPath))
696 cerr := bcdBackup.Close()
702 bcdBackupWriter = winio.NewBackupFileWriter(bcdBackup, false)
704 cerr := bcdBackupWriter.Close()
710 buf.Reset(io.MultiWriter(w, bcdBackupWriter))
722 return backuptar.WriteBackupStreamFromTarFile(buf, t, hdr)
725 func writeLayerFromTar(r io.Reader, w hcsshim.LayerWriter, root string) (int64, error) {
726 t := tar.NewReader(r)
728 totalSize := int64(0)
729 buf := bufio.NewWriter(nil)
731 base := path.Base(hdr.Name)
732 if strings.HasPrefix(base, archive.WhiteoutPrefix) {
733 name := path.Join(path.Dir(hdr.Name), base[len(archive.WhiteoutPrefix):])
734 err = w.Remove(filepath.FromSlash(name))
739 } else if hdr.Typeflag == tar.TypeLink {
740 err = w.AddLink(filepath.FromSlash(hdr.Name), filepath.FromSlash(hdr.Linkname))
749 fileInfo *winio.FileBasicInfo
751 name, size, fileInfo, err = backuptar.FileInfoFromHeader(hdr)
755 err = w.Add(filepath.FromSlash(name), fileInfo)
759 hdr, err = writeBackupStreamFromTarAndSaveMutatedFiles(buf, w, t, hdr, root)
766 return totalSize, nil
769 // importLayer adds a new layer to the tag and graph store based on the given data.
770 func (d *Driver) importLayer(id string, layerData io.Reader, parentLayerPaths []string) (size int64, err error) {
772 cmd := reexec.Command(append([]string{"docker-windows-write-layer", d.info.HomeDir, id}, parentLayerPaths...)...)
773 output := bytes.NewBuffer(nil)
774 cmd.Stdin = layerData
778 if err = cmd.Start(); err != nil {
782 if err = cmd.Wait(); err != nil {
783 return 0, fmt.Errorf("re-exec error: %v: output: %s", err, output)
786 return strconv.ParseInt(output.String(), 10, 64)
788 return writeLayer(layerData, d.info.HomeDir, id, parentLayerPaths...)
791 // writeLayerReexec is the re-exec entry point for writing a layer from a tar file
792 func writeLayerReexec() {
793 size, err := writeLayer(os.Stdin, os.Args[1], os.Args[2], os.Args[3:]...)
795 fmt.Fprint(os.Stderr, err)
798 fmt.Fprint(os.Stdout, size)
801 // writeLayer writes a layer from a tar file.
802 func writeLayer(layerData io.Reader, home string, id string, parentLayerPaths ...string) (int64, error) {
803 err := winio.EnableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege})
809 if err := winio.DisableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege}); err != nil {
810 // This should never happen, but just in case when in debugging mode.
811 // See https://github.com/docker/docker/pull/28002#discussion_r86259241 for rationale.
812 panic("Failed to disabled process privileges while in non re-exec mode")
817 info := hcsshim.DriverInfo{
818 Flavour: filterDriver,
822 w, err := hcsshim.NewLayerWriter(info, id, parentLayerPaths)
827 size, err := writeLayerFromTar(layerData, w, filepath.Join(home, id))
840 // resolveID computes the layerID information based on the given id.
841 func (d *Driver) resolveID(id string) (string, error) {
842 content, err := ioutil.ReadFile(filepath.Join(d.dir(id), "layerID"))
843 if os.IsNotExist(err) {
845 } else if err != nil {
848 return string(content), nil
851 // setID stores the layerId in disk.
852 func (d *Driver) setID(id, altID string) error {
853 return ioutil.WriteFile(filepath.Join(d.dir(id), "layerId"), []byte(altID), 0600)
856 // getLayerChain returns the layer chain information.
857 func (d *Driver) getLayerChain(id string) ([]string, error) {
858 jPath := filepath.Join(d.dir(id), "layerchain.json")
859 content, err := ioutil.ReadFile(jPath)
860 if os.IsNotExist(err) {
862 } else if err != nil {
863 return nil, fmt.Errorf("Unable to read layerchain file - %s", err)
866 var layerChain []string
867 err = json.Unmarshal(content, &layerChain)
869 return nil, fmt.Errorf("Failed to unmarshall layerchain json - %s", err)
872 return layerChain, nil
875 // setLayerChain stores the layer chain information in disk.
876 func (d *Driver) setLayerChain(id string, chain []string) error {
877 content, err := json.Marshal(&chain)
879 return fmt.Errorf("Failed to marshall layerchain json - %s", err)
882 jPath := filepath.Join(d.dir(id), "layerchain.json")
883 err = ioutil.WriteFile(jPath, content, 0600)
885 return fmt.Errorf("Unable to write layerchain file - %s", err)
891 type fileGetCloserWithBackupPrivileges struct {
895 func (fg *fileGetCloserWithBackupPrivileges) Get(filename string) (io.ReadCloser, error) {
896 if backupPath, ok := mutatedFiles[filename]; ok {
897 return os.Open(filepath.Join(fg.path, backupPath))
901 // Open the file while holding the Windows backup privilege. This ensures that the
902 // file can be opened even if the caller does not actually have access to it according
903 // to the security descriptor. Also use sequential file access to avoid depleting the
904 // standby list - Microsoft VSO Bug Tracker #9900466
905 err := winio.RunWithPrivilege(winio.SeBackupPrivilege, func() error {
906 path := longpath.AddPrefix(filepath.Join(fg.path, filename))
907 p, err := syscall.UTF16FromString(path)
911 const fileFlagSequentialScan = 0x08000000 // FILE_FLAG_SEQUENTIAL_SCAN
912 h, err := syscall.CreateFile(&p[0], syscall.GENERIC_READ, syscall.FILE_SHARE_READ, nil, syscall.OPEN_EXISTING, syscall.FILE_FLAG_BACKUP_SEMANTICS|fileFlagSequentialScan, 0)
914 return &os.PathError{Op: "open", Path: path, Err: err}
916 f = os.NewFile(uintptr(h), path)
922 func (fg *fileGetCloserWithBackupPrivileges) Close() error {
926 // DiffGetter returns a FileGetCloser that can read files from the directory that
927 // contains files for the layer differences. Used for direct access for tar-split.
928 func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
930 id, err := d.resolveID(id)
935 return &fileGetCloserWithBackupPrivileges{d.dir(id)}, nil
938 type storageOptions struct {
942 func parseStorageOpt(storageOpt map[string]string) (*storageOptions, error) {
943 options := storageOptions{}
945 // Read size to change the block device size per container.
946 for key, val := range storageOpt {
947 key := strings.ToLower(key)
950 size, err := units.RAMInBytes(val)
954 options.size = uint64(size)
956 return nil, fmt.Errorf("Unknown storage option: %s", key)