@@ -48,53 +48,128 @@ trap 'errexit' ERR
4848# expansions and subshells
4949set -o errtrace
5050
51+ # Track these globally so we only load it once.
52+ ROOT_FWD_LINKS=()
53+ ROOT_REV_LINKS=()
54+
55+ function load_root_links() {
56+ local staging=" $1 "
57+
58+ while read -r x; do
59+ if [[ -L " /${x} " ]]; then
60+ ROOT_FWD_LINKS+=(" /${x} " )
61+ ROOT_REV_LINKS+=(" $( realpath " /${x} " ) " )
62+ fi
63+ done < <( ls /)
64+ }
65+
5166# file_to_package identifies the debian package that provided the file $1
5267function file_to_package() {
5368 local file=" $1 "
5469
70+ # Newer versions of debian symlink /lib -> /usr/lib (and others), but dpkg
71+ # has some files in its DB as "/lib/<whatever>" and others as
72+ # "/usr/lib/<whatever>". This causes havoc trying to identify the package
73+ # for a library discovered via ldd.
74+ #
75+ # So, to combat this we build a "map" of root links, and their targets, and
76+ # try to search for both paths.
77+
78+ local alt=" "
79+ local i=0
80+ while (( "${i} " < "${# ROOT_FWD_LINKS[@]} " )) ; do
81+ fwd=" ${ROOT_FWD_LINKS[i]} "
82+ rev=" ${ROOT_REV_LINKS[i]} "
83+ if [[ " ${file} " =~ ^" ${fwd} /" ]]; then
84+ alt=" $( echo " ${file} " | sed " s|^${fwd} |${rev} |" ) "
85+ break
86+ elif [[ " ${file} " =~ ^" ${rev} /" ]]; then
87+ alt=" $( echo " ${file} " | sed " s|^${rev} |${fwd} |" ) "
88+ break
89+ fi
90+ i=$(( $i + 1 ))
91+ done
92+
93+ local out=" "
94+ local result=" "
95+ out=" $( dpkg-query --search " ${file} " 2>&1 ) "
96+ if [[ $? == 0 ]]; then
97+ result=" ${out} "
98+ elif [[ -n " ${alt} " ]]; then
99+ out=" $( dpkg-query --search " ${alt} " 2>&1 ) "
100+ if [[ $? == 0 ]]; then
101+ result=" ${out} "
102+ fi
103+ fi
104+
105+ # If we found no match, let it error out.
106+ if [[ -z " ${result} " ]]; then
107+ dpkg-query --search " ${file} "
108+ return 1
109+ fi
110+
55111 # `dpkg-query --search $file-pattern` outputs lines with the format: "$package: $file-path"
56- # where $file-path belongs to $package
57- # https://manpages.debian.org/jessie/dpkg/dpkg-query.1.en.html
58- dpkg-query --search " $( realpath " ${file} " ) " | cut -d' :' -f1
112+ # where $file-path belongs to $package.
113+ echo " ${result} " | cut -d' :' -f1
59114}
60115
61- # package_to_copyright gives the path to the copyright file for the package $1
62- function package_to_copyright() {
63- local pkg=" $1 "
64- echo " /usr/share/doc/${pkg} /copyright"
116+ function ensure_dir_in_staging() {
117+ local staging=" $1 "
118+ local dir=" $2 "
119+
120+ if [[ ! -e " ${staging} /${dir} " ]]; then
121+ local rel=" $( echo " ${dir} " | sed ' s|^/||' ) "
122+ tar -C / -c --no-recursion --dereference " ${rel} " | tar -C " ${staging} " -x
123+ fi
65124}
66125
67- # stage_file stages the filepath $1 to $2 , following symlinks
126+ # stage_file stages the filepath $2 to $1 , following symlinks
68127# and staging copyrights
69128function stage_file() {
70- local file =" $1 "
71- local staging =" $2 "
129+ local staging =" $1 "
130+ local file =" $2 "
72131
73132 # short circuit if we have done this file before
74133 if [[ -e " ${staging} /${file} " ]]; then
75134 return
76135 fi
77136
78- # copy the named path
79- cp -a --parents " ${file} " " ${staging} "
80-
81- # recursively follow symlinks
82- if [[ -L " ${file} " ]]; then
83- stage_file " $( cd " $( dirname " ${file} " ) " || exit ; realpath -s " $( readlink " ${file} " ) " ) " " ${staging} "
137+ # copy the real form of the named path
138+ local real=" $( realpath " ${file} " ) "
139+ cp -a --parents " ${real} " " ${staging} "
140+
141+ # recreate symlinks, even on intermediate path elements
142+ if [[ " ${file} " != " ${real} " ]]; then
143+ if [[ ! -e " ${staging} /${file} " ]]; then
144+ local dir=" $( dirname " ${file} " ) "
145+ ensure_dir_in_staging " ${staging} " " ${dir} "
146+ ln -s " ${real} " " ${staging} /${file} "
147+ fi
148+ elif [[ -x " $file " ]]; then
149+ # stage the dependencies of the binary
150+ binary_to_libraries " ${file} " \
151+ | while read -r lib; do
152+ stage_file " ${staging} " " ${lib} "
153+ done
84154 fi
85155
86156 # get the package so we can stage package metadata as well
87- local package=" $( file_to_package " ${file} " ) "
157+ local package
158+ package=" $( file_to_package " ${file} " ) "
88159 # stage the copyright for the file, if it exists
89- local copyright=" $( package_to_copyright " ${package} " ) "
90- if [[ -f " ${copyright} " ]]; then
91- cp -a --parents " ${copyright} " " ${staging} "
160+ local copyright_src=" /usr/share/doc/${package} /copyright"
161+ local copyright_dst=" ${staging} /copyright/${package} /copyright.gz"
162+ if [[ -f " ${copyright_src} " && ! -f " ${copyright_dst} " ]]; then
163+ mkdir -p " $( dirname " ${copyright_dst} " ) "
164+ gzip -9 --to-stdout " ${copyright_src} " > " ${copyright_dst} "
92165 fi
93166
94167 # stage the package status mimicking bazel
95168 # https://github.com/bazelbuild/rules_docker/commit/f5432b813e0a11491cf2bf83ff1a923706b36420
96169 # instead of parsing the control file, we can just get the actual package status with dpkg
170+ mkdir -p " ${staging} /var/lib/dpkg/status.d/"
97171 dpkg -s " ${package} " > " ${staging} /var/lib/dpkg/status.d/${package} "
172+
98173}
99174
100175function grep_allow_nomatch() {
@@ -115,23 +190,37 @@ function indent() {
115190 { " $@ " 2>&1 1>&3 | _indent; } 3>&1 1>&2 | _indent
116191}
117192
118- function stage_file_list() {
119- local pkg=" $1 "
120- local staging=" $2 "
121-
122- dpkg -L " ${pkg} " \
123- | grep_allow_nomatch -vE ' (/\.|/usr/share/(man|doc|.*-completion))' \
124- | while read -r file; do
125- if [[ -f " $file " ]]; then
126- stage_file " ${file} " " ${staging} "
127- if [[ -L " $file " ]]; then
128- continue
129- fi
130- if [[ -x " $file " ]]; then
131- stage_binaries " ${staging} " " ${file} "
132- fi
193+ function stage_one_package() {
194+ local staging=" $1 "
195+ local pkg=" $2 "
196+
197+ local names=()
198+ local sums=()
199+ while read -r file; do
200+ if [[ -f " ${file} " ]]; then
201+ local found=" "
202+ if [[ ! -L " ${file} " ]]; then
203+ sum=" $( md5sum " ${file} " | cut -f1 -d' ' ) "
204+ local i=0
205+ for s in " ${sums[@]} " ; do
206+ if [[ " ${sum} " == " ${s} " ]]; then
207+ local dir=" $( dirname " ${file} " ) "
208+ ensure_dir_in_staging " ${staging} " " $( dirname " ${file} " ) "
209+ ln -s " ${names[$i]} " " ${staging} /${file} "
210+ found=" true"
211+ break
212+ fi
213+ i=$(( i+ 1 ))
214+ done
133215 fi
134- done
216+ if [[ -z " ${found} " ]]; then
217+ names+=(" ${file} " )
218+ sums+=(" ${sum} " )
219+ stage_file " ${staging} " " ${file} "
220+ fi
221+ fi
222+ done < <( dpkg -L " ${pkg} " \
223+ | grep_allow_nomatch -vE ' (/\.|/usr/share/(man|doc|.*-completion))' )
135224}
136225
137226function get_dependent_packages() {
@@ -148,18 +237,20 @@ function stage_packages() {
148237 local staging=" $1 "
149238 shift
150239
151- mkdir -p " ${staging} " /var/lib/dpkg/status.d/
152240 indent apt-get -y -qq -o Dpkg::Use-Pty=0 update
153241
154242 local pkg
155243 for pkg; do
156244 echo " staging package ${pkg} "
245+ local du_before=" $( du -sk " ${staging} " | cut -f1) "
157246 indent apt-get -y -qq -o Dpkg::Use-Pty=0 --no-install-recommends install " ${pkg} "
158- stage_file_list " ${pkg} " " $staging "
247+ stage_one_package " $staging " " ${pkg} "
159248 get_dependent_packages " ${pkg} " \
160249 | while read -r dep; do
161- stage_file_list " ${dep } " " ${staging } "
250+ stage_one_package " ${staging } " " ${dep } "
162251 done
252+ local du_after=" $( du -sk " ${staging} " | cut -f1) "
253+ echo " package ${pkg} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)"
163254 done
164255}
165256
@@ -191,29 +282,29 @@ function binary_to_libraries() {
191282 | awk ' {print $1}'
192283}
193284
285+ function stage_one_binary() {
286+ local staging=" $1 "
287+ local bin=" $2 "
288+
289+ # locate the path to the binary
290+ local binary_path
291+ binary_path=" $( which " ${bin} " ) "
292+
293+ # stage the binary itself
294+ stage_file " ${staging} " " ${binary_path} "
295+ }
296+
194297function stage_binaries() {
195298 local staging=" $1 "
196299 shift
197300
198301 local bin
199302 for bin; do
200303 echo " staging binary ${bin} "
201-
202- # locate the path to the binary
203- local binary_path
204- binary_path=" $( which " ${bin} " ) "
205-
206- # ensure package metadata dir
207- mkdir -p " ${staging} /var/lib/dpkg/status.d/"
208-
209- # stage the binary itself
210- stage_file " ${binary_path} " " ${staging} "
211-
212- # stage the dependencies of the binary
213- binary_to_libraries " ${binary_path} " \
214- | while read -r lib; do
215- stage_file " ${lib} " " ${staging} "
216- done
304+ local du_before=" $( du -sk " ${staging} " | cut -f1) "
305+ stage_one_binary " ${staging} " " ${bin} "
306+ local du_after=" $( du -sk " ${staging} " | cut -f1) "
307+ echo " binary ${bin} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)"
217308 done
218309}
219310
@@ -272,12 +363,21 @@ function main() {
272363 exit 4
273364 fi
274365
366+ # Newer versions of debian symlink /bin -> /usr/bin (and lib, and others).
367+ # The somewhat naive copying done in this program does not retain that,
368+ # which causes some files to be duplicated. Fortunately, these are all in
369+ # the root dir, or we might have to do something more complicated.
370+ load_root_links " ${staging} "
371+
275372 if (( "${# pkgs[@]} " > 0 )) ; then
276373 stage_packages " ${staging} " " ${pkgs[@]} "
277374 fi
278375 if (( "${# bins[@]} " > 0 )) ; then
279376 stage_binaries " ${staging} " " ${bins[@]} "
280377 fi
378+
379+ echo " final staged size: $( du -sk " ${staging} " | cut -f1) kB"
380+ du -xk --max-depth=3 " ${staging} " | sort -n | _indent
281381}
282382
283383main " $@ "
0 commit comments