Skip to content

Commit b60eef6

Browse files
authored
Merge pull request #753 from thockin/v4_smaller_image
Make the container image smaller
2 parents 1894192 + f037087 commit b60eef6

File tree

4 files changed

+344
-12
lines changed

4 files changed

+344
-12
lines changed

Dockerfile.in

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,36 @@
4141
#############################################################################
4242
# First we prepare the image that we want, regardless of build layers.
4343
#############################################################################
44-
FROM {ARG_FROM} as prep
44+
FROM {ARG_FROM} as base
4545

4646
# When building, we can pass a unique value (e.g. `date +%s`) for this arg,
4747
# which will force a rebuild from here (by invalidating docker's cache).
4848
ARG FORCE_REBUILD=0
4949

50-
RUN apt-get -q -y update
51-
RUN apt-get -q -y upgrade
52-
RUN apt-get -q -y install --no-install-recommends \
53-
ca-certificates \
54-
coreutils \
55-
socat \
56-
openssh-client \
57-
git
58-
RUN apt-get -q -y autoremove
59-
RUN rm -rf /var/lib/apt/lists/*
50+
RUN apt-get -y -qq -o Dpkg::Use-Pty=0 update
51+
RUN apt-get -y -qq -o Dpkg::Use-Pty=0 -y upgrade
52+
53+
RUN apt-get -y -qq -o Dpkg::Use-Pty=0 install bash # for the staging scripts and ldd
54+
RUN mkdir -p {ARG_STAGING}
55+
COPY stage_binaries.sh /
56+
RUN /stage_binaries.sh -o {ARG_STAGING} \
57+
-p coreutils \
58+
-p socat \
59+
-p openssh-client \
60+
-p git \
61+
-b /bin/dash \
62+
-b /bin/grep \
63+
-b /bin/sed
64+
RUN ln -sf /bin/dash {ARG_STAGING}/bin/sh
65+
66+
COPY clean_distroless.sh /clean_distroless.sh
67+
RUN /clean_distroless.sh {ARG_STAGING}
68+
69+
# We need to use distroless/base for tzdata, glibc, and some others.
70+
FROM gcr.io/distroless/base as intermediate
71+
72+
# Docker doesn't do vars in COPY, so we can't use a regular ARG.
73+
COPY --from=base {ARG_STAGING} /
6074

6175
# Add the default UID to /etc/passwd so SSH is satisfied.
6276
RUN echo "git-sync:x:65533:65533::/tmp:/sbin/nologin" >> /etc/passwd
@@ -90,7 +104,7 @@ COPY bin/{ARG_OS}_{ARG_ARCH}/{ARG_BIN} /{ARG_BIN}
90104
# Now we make a "clean" final image.
91105
#############################################################################
92106
FROM scratch
93-
COPY --from=prep / /
107+
COPY --from=intermediate / /
94108

95109
# Run as non-root by default. There's simply no reason to run as root.
96110
USER 65533:65533

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ container: .container-$(DOTFILE_IMAGE) container-name
169169
-e 's|{ARG_ARCH}|$(ARCH)|g' \
170170
-e 's|{ARG_OS}|$(OS)|g' \
171171
-e 's|{ARG_FROM}|$(BASEIMAGE)|g' \
172+
-e 's|{ARG_STAGING}|/staging|g' \
172173
Dockerfile.in > .dockerfile-$(OS)_$(ARCH)
173174
HASH_LICENSES=$$(find $(LICENSES) -type f \
174175
| xargs md5sum | md5sum | cut -f1 -d' '); \

clean_distroless.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/sh
2+
3+
# Copyright 2022 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# USAGE: clean-distroless.sh <staging_dir>
18+
19+
if [ -z "$1" ]; then
20+
echo "usage: $0 <staging-dir>"
21+
exit 1
22+
fi
23+
ROOT="$1"
24+
25+
# This script needs to be "sh" and not "bash", but there are no arrays in sh,
26+
# except for "$@". We need array semantics on the off chance we ever have a
27+
# pathname with spaces in it.
28+
set -- \
29+
/usr/share/base-files \
30+
/usr/share/man \
31+
/usr/lib/*-linux-gnu/gconv \
32+
/usr/bin/c_rehash \
33+
/usr/bin/openssl \
34+
/iptables-wrapper-installer.sh \
35+
/clean-distroless.sh
36+
37+
for item; do
38+
rm -rf "${ROOT}/${item}"
39+
done

stage_binaries.sh

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
#!/bin/bash
2+
3+
# Copyright 2022 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# USAGE: stage-binaries.sh -o <staging-dir> ( -p <package> | -b binary )..."
18+
#
19+
# Stages all the packages or files and their dependencies (+ libraries and
20+
# copyrights) to the staging dir.
21+
#
22+
# This is intended to be used in a multi-stage docker build with a distroless/base
23+
# or distroless/cc image.
24+
25+
set -o errexit
26+
set -o nounset
27+
set -o pipefail
28+
29+
# A handler for when we exit automatically on an error.
30+
# Borrowed from kubernetes, which was borrowed from
31+
# https://gist.github.com/ahendrix/7030300
32+
function errexit() {
33+
# If the shell we are in doesn't have errexit set (common in subshells) then
34+
# don't dump stacks.
35+
set +o | grep -qe "-o errexit" || return
36+
37+
local file="$(basename "${BASH_SOURCE[1]}")"
38+
local line="${BASH_LINENO[0]}"
39+
local func="${FUNCNAME[1]:-}"
40+
echo "FATAL: error at ${func}() ${file}:${line}" >&2
41+
}
42+
43+
# trap ERR to provide an error handler whenever a command exits nonzero this
44+
# is a more verbose version of set -o errexit
45+
trap 'errexit' ERR
46+
47+
# setting errtrace allows our ERR trap handler to be propagated to functions,
48+
# expansions and subshells
49+
set -o errtrace
50+
51+
# file_to_package identifies the debian package that provided the file $1
52+
function file_to_package() {
53+
local file="$1"
54+
55+
# `dpkg-query --search $file-pattern` outputs lines with the format: "$package: $file-path"
56+
# where $file-path belongs to $package
57+
# https://manpages.debian.org/jessie/dpkg/dpkg-query.1.en.html
58+
dpkg-query --search "$(realpath "${file}")" | cut -d':' -f1
59+
}
60+
61+
# package_to_copyright gives the path to the copyright file for the package $1
62+
function package_to_copyright() {
63+
local pkg="$1"
64+
echo "/usr/share/doc/${pkg}/copyright"
65+
}
66+
67+
# stage_file stages the filepath $1 to $2, following symlinks
68+
# and staging copyrights
69+
function stage_file() {
70+
local file="$1"
71+
local staging="$2"
72+
73+
# copy the named path
74+
cp -a --parents "${file}" "${staging}"
75+
76+
# recursively follow symlinks
77+
if [[ -L "${file}" ]]; then
78+
stage_file "$(cd "$(dirname "${file}")" || exit; realpath -s "$(readlink "${file}")")" "${staging}"
79+
fi
80+
81+
# get the package so we can stage package metadata as well
82+
local package="$(file_to_package "${file}")"
83+
# stage the copyright for the file, if it exists
84+
local copyright="$(package_to_copyright "${package}")"
85+
if [[ -f "${copyright}" ]]; then
86+
cp -a --parents "${copyright}" "${staging}"
87+
fi
88+
89+
# stage the package status mimicking bazel
90+
# https://github.com/bazelbuild/rules_docker/commit/f5432b813e0a11491cf2bf83ff1a923706b36420
91+
# instead of parsing the control file, we can just get the actual package status with dpkg
92+
dpkg -s "${package}" > "${staging}/var/lib/dpkg/status.d/${package}"
93+
}
94+
95+
function grep_allow_nomatch() {
96+
# grep exits 0 on match, 1 on no match, 2 on error
97+
grep "$@" || [[ $? == 1 ]]
98+
}
99+
100+
function _indent() {
101+
while read -r X; do
102+
echo " ${X}"
103+
done
104+
}
105+
106+
# run "$@" and indent the output
107+
function indent() {
108+
# This lets us process stderr and stdout without merging them, without
109+
# bash-isms.
110+
{ "$@" 2>&1 1>&3 | _indent; } 3>&1 1>&2 | _indent
111+
}
112+
113+
function stage_file_list() {
114+
local pkg="$1"
115+
local staging="$2"
116+
117+
dpkg -L "${pkg}" \
118+
| grep_allow_nomatch -vE '(/\.|/usr/share/(man|doc|.*-completion))' \
119+
| while read -r file; do
120+
if [[ -f "$file" ]]; then
121+
stage_file "${file}" "${staging}"
122+
if [[ -L "$file" ]]; then
123+
continue
124+
fi
125+
if [[ -x "$file" ]]; then
126+
stage_binaries "${staging}" "${file}"
127+
fi
128+
fi
129+
done
130+
}
131+
132+
function get_dependent_packages() {
133+
local pkg="$1"
134+
apt-cache depends "${pkg}" \
135+
| grep_allow_nomatch Depends \
136+
| awk -F '.*Depends:[[:space:]]?' '{print $2}'
137+
}
138+
139+
# Args:
140+
# $1: path to staging dir
141+
# $2+: package names
142+
function stage_packages() {
143+
local staging="$1"
144+
shift
145+
146+
mkdir -p "${staging}"/var/lib/dpkg/status.d/
147+
indent apt-get -y -qq -o Dpkg::Use-Pty=0 update
148+
149+
local pkg
150+
for pkg; do
151+
echo "staging package ${pkg}"
152+
indent apt-get -y -qq -o Dpkg::Use-Pty=0 --no-install-recommends install "${pkg}"
153+
stage_file_list "${pkg}" "$staging"
154+
get_dependent_packages "${pkg}" \
155+
| while read -r dep; do
156+
stage_file_list "${dep}" "${staging}"
157+
done
158+
done
159+
}
160+
161+
# binary_to_libraries identifies the library files needed by the binary $1 with ldd
162+
function binary_to_libraries() {
163+
local bin="$1"
164+
165+
# see: https://man7.org/linux/man-pages/man1/ldd.1.html
166+
# Each output line looks like:
167+
# linux-vdso.so.1 (0x00007fffb11c3000)
168+
# or
169+
# libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f2f52d26000)
170+
#
171+
# This is a little funky because ldd treats static binaries as errors ("not
172+
# a dynamic executable") but static libraries as non-errors ("statically
173+
# linked"). We want real ldd errors, but static binaries are OK.
174+
if [[ "$(ldd "${bin}" 2>&1)" =~ "not a dynamic executable" ]]; then
175+
return
176+
fi
177+
ldd "${bin}" \
178+
`# skip static binaries` \
179+
| grep_allow_nomatch -v "statically linked" \
180+
`# linux-vdso.so.1 is a special virtual shared object from the kernel` \
181+
`# see: http://man7.org/linux/man-pages/man7/vdso.7.html` \
182+
| grep_allow_nomatch -v 'linux-vdso.so.1' \
183+
`# strip the leading '${name} => ' if any so only '/lib-foo.so (0xf00)' remains` \
184+
| sed -E 's#.* => /#/#' \
185+
`# we want only the path remaining, not the (0x${LOCATION})` \
186+
| awk '{print $1}'
187+
}
188+
189+
function stage_binaries() {
190+
local staging="$1"
191+
shift
192+
193+
local bin
194+
for bin; do
195+
echo "staging binary ${bin}"
196+
197+
# locate the path to the binary
198+
local binary_path
199+
binary_path="$(which "${bin}")"
200+
201+
# ensure package metadata dir
202+
mkdir -p "${staging}/var/lib/dpkg/status.d/"
203+
204+
# stage the binary itself
205+
stage_file "${binary_path}" "${staging}"
206+
207+
# stage the dependencies of the binary
208+
binary_to_libraries "${binary_path}" \
209+
| while read -r lib; do
210+
stage_file "${lib}" "${staging}"
211+
done
212+
done
213+
}
214+
215+
function usage() {
216+
echo "$0 -o <staging-dir> ( -p <package> | -b binary )..."
217+
}
218+
219+
function main() {
220+
local staging=""
221+
local pkgs=()
222+
local bins=()
223+
224+
while [ "$#" -gt 0 ]; do
225+
case "$1" in
226+
"-?")
227+
usage
228+
exit 0
229+
;;
230+
"-b")
231+
if [[ -z "${2:-}" ]]; then
232+
echo "error: flag '-b' requires an argument" >&2
233+
usage >&2
234+
exit 2
235+
fi
236+
bins+=("$2")
237+
shift 2
238+
;;
239+
"-p")
240+
if [[ -z "${2:-}" ]]; then
241+
echo "error: flag '-p' requires an argument" >&2
242+
usage >&2
243+
exit 2
244+
fi
245+
pkgs+=("$2")
246+
shift 2
247+
;;
248+
"-o")
249+
if [[ -z "${2:-}" ]]; then
250+
echo "error: flag '-o' requires an argument" >&2
251+
usage >&2
252+
exit 2
253+
fi
254+
staging="$2"
255+
shift 2
256+
;;
257+
*)
258+
echo "error: unknown argument: $1" >&2
259+
usage >&2
260+
exit 3
261+
;;
262+
esac
263+
done
264+
265+
if [[ -z "${staging}" ]]; then
266+
usage >&2
267+
exit 4
268+
fi
269+
270+
if (( "${#pkgs[@]}" > 0 )); then
271+
stage_packages "${staging}" "${pkgs[@]}"
272+
fi
273+
if (( "${#bins[@]}" > 0 )); then
274+
stage_binaries "${staging}" "${bins[@]}"
275+
fi
276+
}
277+
278+
main "$@"

0 commit comments

Comments
 (0)