Skip to content

Commit b7900ef

Browse files
committed
Update to use bookworm base image
This include symlinks from /bin -> /usr/bin (and lib, and ...), which broke the build script. See comments in there for details.
1 parent 0753bd5 commit b7900ef

File tree

4 files changed

+182
-68
lines changed

4 files changed

+182
-68
lines changed

Dockerfile.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,15 @@ RUN /stage_binaries.sh -o {ARG_STAGING} \
6363
-b /bin/sed
6464
RUN ln -sf /bin/dash {ARG_STAGING}/bin/sh
6565

66-
COPY clean_distroless.sh /clean_distroless.sh
67-
RUN /clean_distroless.sh {ARG_STAGING}
68-
6966
# We need to use distroless/base for tzdata, glibc, and some others.
7067
FROM gcr.io/distroless/base as intermediate
7168

7269
# Docker doesn't do vars in COPY, so we can't use a regular ARG.
7370
COPY --from=base {ARG_STAGING} /
7471

72+
COPY clean_distroless.sh /clean_distroless.sh
73+
RUN /clean_distroless.sh
74+
7575
# Add the default UID to /etc/passwd so SSH is satisfied.
7676
RUN echo "git-sync:x:65533:65533::/tmp:/sbin/nologin" >> /etc/passwd
7777
# A user might choose a different UID and set the `--add-user` flag, which

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ ALL_PLATFORMS := linux/amd64 linux/arm linux/arm64 linux/ppc64le linux/s390x
4747
OS := $(if $(GOOS),$(GOOS),$(shell go env GOOS))
4848
ARCH := $(if $(GOARCH),$(GOARCH),$(shell go env GOARCH))
4949

50-
BASEIMAGE ?= registry.k8s.io/build-image/debian-base:bullseye-v1.4.3
50+
BASEIMAGE ?= registry.k8s.io/build-image/debian-base:bookworm-v1.0.0
5151

5252
IMAGE := $(REGISTRY)/$(BIN)
5353
TAG := $(VERSION)

clean_distroless.sh

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,38 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17-
# USAGE: clean-distroless.sh <staging_dir>
18-
19-
if [ -z "$1" ]; then
20-
echo "usage: $0 <staging-dir>"
21-
exit 1
22-
fi
23-
ROOT="$1"
24-
2517
# This script needs to be "sh" and not "bash", but there are no arrays in sh,
2618
# except for "$@". We need array semantics on the off chance we ever have a
2719
# pathname with spaces in it.
20+
#
21+
# This list is not generic - it is specific to git-sync on debian bookworm.
2822
set -- \
2923
/usr/share/base-files \
24+
/usr/share/doc \
3025
/usr/share/man \
3126
/usr/lib/*-linux-gnu/gconv \
3227
/usr/bin/c_rehash \
28+
/usr/bin/git-shell \
29+
/usr/bin/openssl \
30+
/usr/bin/scalar \
31+
/usr/bin/scp \
32+
/usr/bin/sftp \
33+
/usr/bin/ssh-add \
34+
/usr/bin/ssh-agent \
35+
/usr/bin/ssh-keygen \
36+
/usr/bin/ssh-keyscan \
37+
/usr/lib/git-core/git-shell \
3338
/usr/bin/openssl \
34-
/iptables-wrapper-installer.sh \
39+
/usr/lib/git-core/git-daemon \
40+
/usr/lib/git-core/git-http-backend \
41+
/usr/lib/git-core/git-http-fetch \
42+
/usr/lib/git-core/git-http-push \
43+
/usr/lib/git-core/git-imap-send \
44+
/usr/lib/git-core/git-sh-i18n--envsubst \
45+
/usr/lib/openssh/ssh-keysign \
46+
/usr/lib/openssh/ssh-pkcs11-helper \
47+
/usr/lib/openssh/ssh-sk-helper \
48+
/usr/share/gitweb \
3549
/clean-distroless.sh
3650

3751
for item; do

stage_binaries.sh

Lines changed: 155 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -48,53 +48,128 @@ trap 'errexit' ERR
4848
# expansions and subshells
4949
set -o errtrace
5050

51+
# Track these globally so we only load it once.
52+
ROOT_FWD_LINKS=()
53+
ROOT_REV_LINKS=()
54+
55+
function load_root_links() {
56+
local staging="$1"
57+
58+
while read -r x; do
59+
if [[ -L "/${x}" ]]; then
60+
ROOT_FWD_LINKS+=("/${x}")
61+
ROOT_REV_LINKS+=("$(realpath "/${x}")")
62+
fi
63+
done < <(ls /)
64+
}
65+
5166
# file_to_package identifies the debian package that provided the file $1
5267
function file_to_package() {
5368
local file="$1"
5469

70+
# Newer versions of debian symlink /lib -> /usr/lib (and others), but dpkg
71+
# has some files in its DB as "/lib/<whatever>" and others as
72+
# "/usr/lib/<whatever>". This causes havoc trying to identify the package
73+
# for a library discovered via ldd.
74+
#
75+
# So, to combat this we build a "map" of root links, and their targets, and
76+
# try to search for both paths.
77+
78+
local alt=""
79+
local i=0
80+
while (( "${i}" < "${#ROOT_FWD_LINKS[@]}" )); do
81+
fwd="${ROOT_FWD_LINKS[i]}"
82+
rev="${ROOT_REV_LINKS[i]}"
83+
if [[ "${file}" =~ ^"${fwd}/" ]]; then
84+
alt="$(echo "${file}" | sed "s|^${fwd}|${rev}|")"
85+
break
86+
elif [[ "${file}" =~ ^"${rev}/" ]]; then
87+
alt="$(echo "${file}" | sed "s|^${rev}|${fwd}|")"
88+
break
89+
fi
90+
i=$(($i+1))
91+
done
92+
93+
local out=""
94+
local result=""
95+
out="$(dpkg-query --search "${file}" 2>&1)"
96+
if [[ $? == 0 ]]; then
97+
result="${out}"
98+
elif [[ -n "${alt}" ]]; then
99+
out="$(dpkg-query --search "${alt}" 2>&1)"
100+
if [[ $? == 0 ]]; then
101+
result="${out}"
102+
fi
103+
fi
104+
105+
# If we found no match, let it error out.
106+
if [[ -z "${result}" ]]; then
107+
dpkg-query --search "${file}"
108+
return 1
109+
fi
110+
55111
# `dpkg-query --search $file-pattern` outputs lines with the format: "$package: $file-path"
56-
# where $file-path belongs to $package
57-
# https://manpages.debian.org/jessie/dpkg/dpkg-query.1.en.html
58-
dpkg-query --search "$(realpath "${file}")" | cut -d':' -f1
112+
# where $file-path belongs to $package.
113+
echo "${result}" | cut -d':' -f1
59114
}
60115

61-
# package_to_copyright gives the path to the copyright file for the package $1
62-
function package_to_copyright() {
63-
local pkg="$1"
64-
echo "/usr/share/doc/${pkg}/copyright"
116+
function ensure_dir_in_staging() {
117+
local staging="$1"
118+
local dir="$2"
119+
120+
if [[ ! -e "${staging}/${dir}" ]]; then
121+
local rel="$(echo "${dir}" | sed 's|^/||')"
122+
tar -C / -c --no-recursion --dereference "${rel}" | tar -C "${staging}" -x
123+
fi
65124
}
66125

67-
# stage_file stages the filepath $1 to $2, following symlinks
126+
# stage_file stages the filepath $2 to $1, following symlinks
68127
# and staging copyrights
69128
function stage_file() {
70-
local file="$1"
71-
local staging="$2"
129+
local staging="$1"
130+
local file="$2"
72131

73132
# short circuit if we have done this file before
74133
if [[ -e "${staging}/${file}" ]]; then
75134
return
76135
fi
77136

78-
# copy the named path
79-
cp -a --parents "${file}" "${staging}"
80-
81-
# recursively follow symlinks
82-
if [[ -L "${file}" ]]; then
83-
stage_file "$(cd "$(dirname "${file}")" || exit; realpath -s "$(readlink "${file}")")" "${staging}"
137+
# copy the real form of the named path
138+
local real="$(realpath "${file}")"
139+
cp -a --parents "${real}" "${staging}"
140+
141+
# recreate symlinks, even on intermediate path elements
142+
if [[ "${file}" != "${real}" ]]; then
143+
if [[ ! -e "${staging}/${file}" ]]; then
144+
local dir="$(dirname "${file}")"
145+
ensure_dir_in_staging "${staging}" "${dir}"
146+
ln -s "${real}" "${staging}/${file}"
147+
fi
148+
elif [[ -x "$file" ]]; then
149+
# stage the dependencies of the binary
150+
binary_to_libraries "${file}" \
151+
| while read -r lib; do
152+
stage_file "${staging}" "${lib}"
153+
done
84154
fi
85155

86156
# get the package so we can stage package metadata as well
87-
local package="$(file_to_package "${file}")"
157+
local package
158+
package="$(file_to_package "${file}")"
88159
# stage the copyright for the file, if it exists
89-
local copyright="$(package_to_copyright "${package}")"
90-
if [[ -f "${copyright}" ]]; then
91-
cp -a --parents "${copyright}" "${staging}"
160+
local copyright_src="/usr/share/doc/${package}/copyright"
161+
local copyright_dst="${staging}/copyright/${package}/copyright.gz"
162+
if [[ -f "${copyright_src}" && ! -f "${copyright_dst}" ]]; then
163+
mkdir -p "$(dirname "${copyright_dst}")"
164+
gzip -9 --to-stdout "${copyright_src}" > "${copyright_dst}"
92165
fi
93166

94167
# stage the package status mimicking bazel
95168
# https://github.com/bazelbuild/rules_docker/commit/f5432b813e0a11491cf2bf83ff1a923706b36420
96169
# instead of parsing the control file, we can just get the actual package status with dpkg
170+
mkdir -p "${staging}/var/lib/dpkg/status.d/"
97171
dpkg -s "${package}" > "${staging}/var/lib/dpkg/status.d/${package}"
172+
98173
}
99174

100175
function grep_allow_nomatch() {
@@ -115,23 +190,37 @@ function indent() {
115190
{ "$@" 2>&1 1>&3 | _indent; } 3>&1 1>&2 | _indent
116191
}
117192

118-
function stage_file_list() {
119-
local pkg="$1"
120-
local staging="$2"
121-
122-
dpkg -L "${pkg}" \
123-
| grep_allow_nomatch -vE '(/\.|/usr/share/(man|doc|.*-completion))' \
124-
| while read -r file; do
125-
if [[ -f "$file" ]]; then
126-
stage_file "${file}" "${staging}"
127-
if [[ -L "$file" ]]; then
128-
continue
129-
fi
130-
if [[ -x "$file" ]]; then
131-
stage_binaries "${staging}" "${file}"
132-
fi
193+
function stage_one_package() {
194+
local staging="$1"
195+
local pkg="$2"
196+
197+
local names=()
198+
local sums=()
199+
while read -r file; do
200+
if [[ -f "${file}" ]]; then
201+
local found=""
202+
if [[ ! -L "${file}" ]]; then
203+
sum="$(md5sum "${file}" | cut -f1 -d' ')"
204+
local i=0
205+
for s in "${sums[@]}"; do
206+
if [[ "${sum}" == "${s}" ]]; then
207+
local dir="$(dirname "${file}")"
208+
ensure_dir_in_staging "${staging}" "$(dirname "${file}")"
209+
ln -s "${names[$i]}" "${staging}/${file}"
210+
found="true"
211+
break
212+
fi
213+
i=$((i+1))
214+
done
133215
fi
134-
done
216+
if [[ -z "${found}" ]]; then
217+
names+=("${file}")
218+
sums+=("${sum}")
219+
stage_file "${staging}" "${file}"
220+
fi
221+
fi
222+
done < <( dpkg -L "${pkg}" \
223+
| grep_allow_nomatch -vE '(/\.|/usr/share/(man|doc|.*-completion))' )
135224
}
136225

137226
function get_dependent_packages() {
@@ -148,18 +237,20 @@ function stage_packages() {
148237
local staging="$1"
149238
shift
150239

151-
mkdir -p "${staging}"/var/lib/dpkg/status.d/
152240
indent apt-get -y -qq -o Dpkg::Use-Pty=0 update
153241

154242
local pkg
155243
for pkg; do
156244
echo "staging package ${pkg}"
245+
local du_before="$(du -sk "${staging}" | cut -f1)"
157246
indent apt-get -y -qq -o Dpkg::Use-Pty=0 --no-install-recommends install "${pkg}"
158-
stage_file_list "${pkg}" "$staging"
247+
stage_one_package "$staging" "${pkg}"
159248
get_dependent_packages "${pkg}" \
160249
| while read -r dep; do
161-
stage_file_list "${dep}" "${staging}"
250+
stage_one_package "${staging}" "${dep}"
162251
done
252+
local du_after="$(du -sk "${staging}" | cut -f1)"
253+
echo "package ${pkg} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)"
163254
done
164255
}
165256

@@ -191,29 +282,29 @@ function binary_to_libraries() {
191282
| awk '{print $1}'
192283
}
193284

285+
function stage_one_binary() {
286+
local staging="$1"
287+
local bin="$2"
288+
289+
# locate the path to the binary
290+
local binary_path
291+
binary_path="$(which "${bin}")"
292+
293+
# stage the binary itself
294+
stage_file "${staging}" "${binary_path}"
295+
}
296+
194297
function stage_binaries() {
195298
local staging="$1"
196299
shift
197300

198301
local bin
199302
for bin; do
200303
echo "staging binary ${bin}"
201-
202-
# locate the path to the binary
203-
local binary_path
204-
binary_path="$(which "${bin}")"
205-
206-
# ensure package metadata dir
207-
mkdir -p "${staging}/var/lib/dpkg/status.d/"
208-
209-
# stage the binary itself
210-
stage_file "${binary_path}" "${staging}"
211-
212-
# stage the dependencies of the binary
213-
binary_to_libraries "${binary_path}" \
214-
| while read -r lib; do
215-
stage_file "${lib}" "${staging}"
216-
done
304+
local du_before="$(du -sk "${staging}" | cut -f1)"
305+
stage_one_binary "${staging}" "${bin}"
306+
local du_after="$(du -sk "${staging}" | cut -f1)"
307+
echo "binary ${bin} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)"
217308
done
218309
}
219310

@@ -272,12 +363,21 @@ function main() {
272363
exit 4
273364
fi
274365

366+
# Newer versions of debian symlink /bin -> /usr/bin (and lib, and others).
367+
# The somewhat naive copying done in this program does not retain that,
368+
# which causes some files to be duplicated. Fortunately, these are all in
369+
# the root dir, or we might have to do something more complicated.
370+
load_root_links "${staging}"
371+
275372
if (( "${#pkgs[@]}" > 0 )); then
276373
stage_packages "${staging}" "${pkgs[@]}"
277374
fi
278375
if (( "${#bins[@]}" > 0 )); then
279376
stage_binaries "${staging}" "${bins[@]}"
280377
fi
378+
379+
echo "final staged size: $(du -sk "${staging}" | cut -f1) kB"
380+
du -xk --max-depth=3 "${staging}" | sort -n | _indent
281381
}
282382

283383
main "$@"

0 commit comments

Comments
 (0)