Skip to content

Commit bd05f55

Browse files
committed
Unpack TF plugins in a more atomic way.
The original implementation of plugin cache handling unpacks plugin archives in a way that can result in race conditions when multiple terraform processes are accessing the same plugin cache. Since the archives are being decompressed in chunks and output files are written directly to the cache, we observed following manifestations of the issue: - `text file busy` errors if other terraform processes are already running the plugin and another process tries to replace it. - various plugin checksum errors triggered likely by simultaneous checksumming and rewriting the file. This PR changes the zip archives with plugins are handled: 1. Instead of writing directly to the target directory, `installFromLocalArchive` is now writing to a temporary staging directory prefixed with`.temp` that resides in the plugin cache dir to ensure this is on the same filesystem. 2. After unpacking, the directory structure of the staging directory is replicated in the `targetDir`. The directories are created as needed and any files are moved using `os.Rename`. After this, the staging directory is removed. 3. Since the temporary staging directories can be picked up by `SearchLocalDirectory` and make it fail in the situation when they're removed during directory traversal, the function has been modified to skip any entry that starts with dot. Signed-off-by: Milan Plzik <milan.plzik@grafana.com>
1 parent b4a634c commit bd05f55

File tree

3 files changed

+64
-10
lines changed

3 files changed

+64
-10
lines changed

internal/getproviders/filesystem_search.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,25 @@ func SearchLocalDirectory(baseDir string) (map[addrs.Provider]PackageMetaList, e
4545
log.Printf("[TRACE] getproviders.SearchLocalDirectory: failed to resolve symlinks for %s: %s", baseDir, err)
4646
}
4747

48-
err := filepath.Walk(baseDir, func(fullPath string, info os.FileInfo, err error) error {
48+
err := filepath.Walk(baseDir, func(fullPath string, info os.FileInfo, werr error) error {
49+
fsPath, err := filepath.Rel(baseDir, fullPath)
4950
if err != nil {
51+
// This should never happen because the filepath.Walk contract is
52+
// for the paths to include the base path.
53+
log.Printf("[TRACE] getproviders.SearchLocalDirectory: ignoring malformed path %q during walk: %s", fullPath, err)
54+
return nil
55+
}
56+
57+
if filepath.Base(fsPath)[0] == '.' && fsPath != "." {
58+
// Dot-files should not occur in the provider cache at all.
59+
// Dot-directories are used by e.g. temporary directories to
60+
// unpack the packed providers. These might disappear at any
61+
// moment, making the traversal fairly brittle. Just skip them.
62+
log.Printf("[TRACE] getproviders.SearchLocalDirectory: skipping dotfile %s", fsPath)
63+
return filepath.SkipDir
64+
}
65+
66+
if werr != nil {
5067
return fmt.Errorf("cannot search %s: %s", fullPath, err)
5168
}
5269

@@ -56,13 +73,7 @@ func SearchLocalDirectory(baseDir string) (map[addrs.Provider]PackageMetaList, e
5673
//
5774
// Both of these give us enough information to identify the package
5875
// metadata.
59-
fsPath, err := filepath.Rel(baseDir, fullPath)
60-
if err != nil {
61-
// This should never happen because the filepath.Walk contract is
62-
// for the paths to include the base path.
63-
log.Printf("[TRACE] getproviders.SearchLocalDirectory: ignoring malformed path %q during walk: %s", fullPath, err)
64-
return nil
65-
}
76+
6677
relPath := filepath.ToSlash(fsPath)
6778
parts := strings.Split(relPath, "/")
6879

internal/providercache/installer_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2299,7 +2299,7 @@ func TestEnsureProviderVersions_local_source(t *testing.T) {
22992299
provider: "null",
23002300
version: "2.1.0",
23012301
wantHash: getproviders.NilHash,
2302-
err: "zip: not a valid zip file",
2302+
err: "failed to decompress: zip: not a valid zip file",
23032303
},
23042304
"version-constraint-unmet": {
23052305
provider: "null",

internal/providercache/package_install.go

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ package providercache
66
import (
77
"context"
88
"fmt"
9+
"io/fs"
910
"net/url"
1011
"os"
12+
"path"
1113
"path/filepath"
1214

1315
getter "github.com/hashicorp/go-getter"
@@ -122,7 +124,48 @@ func installFromLocalArchive(ctx context.Context, meta getproviders.PackageMeta,
122124
// match the allowed hashes and so our caller should catch that after
123125
// we return if so.
124126

125-
err := unzip.Decompress(targetDir, filename, true, 0000)
127+
err := os.MkdirAll(targetDir, 0777)
128+
if err != nil {
129+
return authResult, fmt.Errorf("failed to create new directory: %w", err)
130+
}
131+
132+
stagingDir, err := os.MkdirTemp(path.Dir(targetDir), ".temp")
133+
if err != nil {
134+
return authResult, fmt.Errorf("failed to create a temp dir: %w", err)
135+
}
136+
defer os.RemoveAll(stagingDir)
137+
138+
err = unzip.Decompress(stagingDir, filename, true, 0000)
139+
if err != nil {
140+
return authResult, fmt.Errorf("failed to decompress: %w", err)
141+
}
142+
143+
// Try to atomically move the files from stagingDir to targetDir.
144+
err = filepath.Walk(stagingDir, func(path string, info fs.FileInfo, err error) error {
145+
if err != nil {
146+
return fmt.Errorf("failed to copy path %s to target directory: %w", path, err)
147+
}
148+
relPath, err := filepath.Rel(stagingDir, path)
149+
if err != nil {
150+
return fmt.Errorf("failed to calculate relative path: %w", err)
151+
}
152+
153+
if info.IsDir() {
154+
// Create the directory
155+
err := os.MkdirAll(filepath.Join(targetDir, relPath), info.Mode().Perm())
156+
if err != nil {
157+
return fmt.Errorf("failed to create path: %w", err)
158+
}
159+
} else {
160+
// On supported platforms, this should perform atomic replacement of the file.
161+
err := os.Rename(path, filepath.Join(targetDir, relPath))
162+
if err != nil {
163+
return fmt.Errorf("failed to move '%s': %w", path, err)
164+
}
165+
}
166+
return nil
167+
})
168+
126169
if err != nil {
127170
return authResult, err
128171
}

0 commit comments

Comments
 (0)