3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
40 $Id: meta2deps.py,v 1.34 2020/10/02 03:11:17 sjg Exp $
42 Copyright (c) 2011-2020, Simon J. Gerraty
43 Copyright (c) 2011-2017, Juniper Networks, Inc.
46 Redistribution and use in source and binary forms, with or without
47 modification, are permitted provided that the following conditions
49 1. Redistributions of source code must retain the above copyright
50 notice, this list of conditions and the following disclaimer.
51 2. Redistributions in binary form must reproduce the above copyright
52 notice, this list of conditions and the following disclaimer in the
53 documentation and/or other materials provided with the distribution.
55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 def getv(dict, key, d=None):
72 """Lookup key in dict and return value or the supplied default."""
77 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
79 Return an absolute path, resolving via cwd or last_dir if needed.
81 if path.endswith('/.'):
83 if len(path) > 0 and path[0] == '/':
84 if os.path.exists(path):
87 print("skipping non-existent:", path, file=debug_out)
91 if path.startswith('./'):
95 for d in [last_dir, cwd]:
100 p = '/'.join(dw[:-1])
104 p = '/'.join([d,path])
106 print("looking for:", p, end=' ', file=debug_out)
107 if not os.path.exists(p):
109 print("nope", file=debug_out)
113 print("found:", p, file=debug_out)
118 """cleanup path without using realpath(3)"""
119 if path.startswith('/'):
126 if not d or d == '.':
136 return r + '/'.join(p)
138 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
140 Return an absolute path, resolving via cwd or last_dir if needed.
141 this gets called a lot, so we try to avoid calling realpath.
143 rpath = resolve(path, cwd, last_dir, debug, debug_out)
146 elif len(path) > 0 and path[0] == '/':
148 if (path.find('/') < 0 or
149 path.find('./') > 0 or
150 path.endswith('/..')):
151 path = cleanpath(path)
154 def sort_unique(list, cmp=None, key=None, reverse=False):
155 list.sort(cmp, key, reverse)
166 return ['/' + x + '/',
172 """class to parse meta files generated by bmake."""
185 def __init__(self, name, conf={}):
186 """if name is set we will parse it now.
187 conf can have the follwing keys:
189 SRCTOPS list of tops of the src tree(s).
191 CURDIR the src directory 'bmake' was run from.
193 RELDIR the relative path from SRCTOP to CURDIR
195 MACHINE the machine we built for.
196 set to 'none' if we are not cross-building.
197 More specifically if machine cannot be deduced from objdirs.
200 Sometimes MACHINE isn't enough.
203 when we build for the pseudo machine 'host'
204 the object tree uses HOST_TARGET rather than MACHINE.
206 OBJROOTS a list of the common prefix for all obj dirs it might
209 DPDEPS names an optional file to which per file dependencies
211 For example if 'some/path/foo.h' is read from SRCTOP
212 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
213 This can allow 'bmake' to learn all the dirs within
214 the tree that depend on 'foo.h'
217 A list of paths to ignore.
218 ccache(1) can otherwise be trouble.
220 debug desired debug level
222 debug_out open file to send debug output to (sys.stderr)
227 self.debug = getv(conf, 'debug', 0)
228 self.debug_out = getv(conf, 'debug_out', sys.stderr)
230 self.machine = getv(conf, 'MACHINE', '')
231 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
232 self.target_spec = getv(conf, 'TARGET_SPEC', '')
233 self.curdir = getv(conf, 'CURDIR')
234 self.reldir = getv(conf, 'RELDIR')
235 self.dpdeps = getv(conf, 'DPDEPS')
239 # some of the steps below we want to do only once
241 self.host_target = getv(conf, 'HOST_TARGET')
242 for srctop in getv(conf, 'SRCTOPS', []):
243 if srctop[-1] != '/':
245 if not srctop in self.srctops:
246 self.srctops.append(srctop)
247 _srctop = os.path.realpath(srctop)
248 if _srctop[-1] != '/':
250 if not _srctop in self.srctops:
251 self.srctops.append(_srctop)
253 trim_list = add_trims(self.machine)
254 if self.machine == 'host':
255 trim_list += add_trims(self.host_target)
257 trim_list += add_trims(self.target_spec)
259 for objroot in getv(conf, 'OBJROOTS', []):
261 if objroot.endswith(e):
262 # this is not what we want - fix it
263 objroot = objroot[0:-len(e)]
265 if objroot[-1] != '/':
267 if not objroot in self.objroots:
268 self.objroots.append(objroot)
269 _objroot = os.path.realpath(objroot)
270 if objroot[-1] == '/':
272 if not _objroot in self.objroots:
273 self.objroots.append(_objroot)
275 # we want the longest match
276 self.srctops.sort(reverse=True)
277 self.objroots.sort(reverse=True)
279 self.excludes = getv(conf, 'EXCLUDES', [])
282 print("host_target=", self.host_target, file=self.debug_out)
283 print("srctops=", self.srctops, file=self.debug_out)
284 print("objroots=", self.objroots, file=self.debug_out)
285 print("excludes=", self.excludes, file=self.debug_out)
287 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
289 if self.dpdeps and not self.reldir:
291 print("need reldir:", end=' ', file=self.debug_out)
293 srctop = self.find_top(self.curdir, self.srctops)
295 self.reldir = self.curdir.replace(srctop,'')
297 print(self.reldir, file=self.debug_out)
299 self.dpdeps = None # we cannot do it?
301 self.cwd = os.getcwd() # make sure this is initialized
302 self.last_dir = self.cwd
308 """reset state if we are being passed meta files from multiple directories."""
314 def dirdeps(self, sep='\n'):
316 return sep.strip() + sep.join(self.obj_deps)
318 def src_dirdeps(self, sep='\n'):
319 """return SRC_DIRDEPS"""
320 return sep.strip() + sep.join(self.src_deps)
322 def file_depends(self, out=None):
323 """Append DPDEPS_${file} += ${RELDIR}
324 for each file we saw, to the output file."""
327 for f in sort_unique(self.file_deps):
328 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
329 # these entries provide for reverse DIRDEPS lookup
330 for f in self.obj_deps:
331 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
333 def seenit(self, dir):
334 """rememer that we have seen dir."""
337 def add(self, list, data, clue=''):
338 """add data to list if it isn't already there."""
342 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
344 def find_top(self, path, list):
345 """the logical tree may be split across multiple trees"""
347 if path.startswith(top):
349 print("found in", top, file=self.debug_out)
353 def find_obj(self, objroot, dir, path, input):
354 """return path within objroot, taking care of .dirdep files"""
356 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
357 if not ddep and os.path.exists(ddepf):
358 ddep = open(ddepf, 'r').readline().strip('# \n')
360 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
361 if ddep.endswith(self.machine):
362 ddep = ddep[0:-(1+len(self.machine))]
363 elif self.target_spec and ddep.endswith(self.target_spec):
364 ddep = ddep[0:-(1+len(self.target_spec))]
367 # no .dirdeps, so remember that we've seen the raw input
370 if self.machine == 'none':
371 if dir.startswith(objroot):
372 return dir.replace(objroot,'')
374 m = self.dirdep_re.match(dir.replace(objroot,''))
377 dmachine = m.group(1)
378 if dmachine != self.machine:
379 if not (self.machine == 'host' and
380 dmachine == self.host_target):
382 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
383 ddep += '.' + dmachine
387 def try_parse(self, name=None, file=None):
388 """give file and line number causing exception"""
390 self.parse(name, file)
393 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
396 def parse(self, name=None, file=None):
397 """A meta file looks like:
399 # Meta data file "path"
404 -- filemon acquired metadata --
414 L "pid" "src" "target"
419 We go to some effort to avoid processing a dependency more than once.
420 Of the above record types only C,E,F,L,R,V and W are of interest.
423 version = 0 # unknown
428 cwd = self.last_dir = self.cwd
430 f = open(self.name, 'r')
438 self.seenit(self.curdir) # we ignore this
440 interesting = 'CEFLRV'
443 # ignore anything we don't care about
444 if not line[0] in interesting:
447 print("input:", line, end=' ', file=self.debug_out)
456 # we cannot ignore 'W' records
457 # as they may be 'rw'
461 self.cwd = cwd = self.last_dir = w[1]
462 self.seenit(cwd) # ignore this
464 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
470 pid_last_dir[last_pid] = self.last_dir
471 cwd = getv(pid_cwd, pid, self.cwd)
472 self.last_dir = getv(pid_last_dir, pid, self.cwd)
479 pid_last_dir[npid] = cwd
483 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
487 print("missing cwd=", cwd, file=self.debug_out)
488 if cwd.endswith('/.'):
490 self.last_dir = pid_last_dir[pid] = cwd
493 print("cwd=", cwd, file=self.debug_out)
496 if w[2] in self.seen:
498 print("seen:", w[2], file=self.debug_out)
502 # these are special, tread src as read and
504 self.parse_path(w[2].strip("'"), cwd, 'R', w)
505 self.parse_path(w[3].strip("'"), cwd, 'W', w)
511 self.parse_path(path, cwd, w[0], w)
517 def is_src(self, base, dir, rdir):
518 """is base in srctop"""
519 for dir in [dir,rdir]:
522 path = '/'.join([dir,base])
523 srctop = self.find_top(path, self.srctops)
526 self.add(self.file_deps, path.replace(srctop,''), 'file')
527 self.add(self.src_deps, dir.replace(srctop,''), 'src')
532 def parse_path(self, path, cwd, op=None, w=[]):
533 """look at a path for the op specified"""
538 # we are never interested in .dirdep files as dependencies
539 if path.endswith('.dirdep'):
541 for p in self.excludes:
542 if p and path.startswith(p):
544 print("exclude:", p, path, file=self.debug_out)
546 # we don't want to resolve the last component if it is
548 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
551 dir,base = os.path.split(path)
554 print("seen:", dir, file=self.debug_out)
556 # we can have a path in an objdir which is a link
557 # to the src dir, we may need to add dependencies for each
559 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
560 rdir = os.path.realpath(dir)
563 # now put path back together
564 path = '/'.join([dir,base])
566 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
568 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
570 print("skipping:", path, file=self.debug_out)
572 if os.path.isdir(path):
574 self.last_dir = path;
576 print("ldir=", self.last_dir, file=self.debug_out)
580 # finally, we get down to it
581 if dir == self.cwd or dir == self.curdir:
583 if self.is_src(base, dir, rdir):
589 for dir in [dir,rdir]:
592 objroot = self.find_top(dir, self.objroots)
596 ddep = self.find_obj(objroot, dir, path, w[2])
598 self.add(self.obj_deps, ddep, 'obj')
599 if self.dpdeps and objroot.endswith('/stage/'):
600 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
601 self.add(self.file_deps, sp, 'file')
603 # don't waste time looking again
608 def main(argv, klass=MetaFile, xopts='', xoptf=None):
609 """Simple driver for class MetaFile.
612 script [options] [key=value ...] "meta" ...
614 Options and key=value pairs contribute to the
615 dictionary passed to MetaFile.
618 add "SRCTOP" to the "SRCTOPS" list.
623 add "OBJROOT" to the "OBJROOTS" list.
638 # import Psyco if we can
639 # it can speed things up quite a bit
655 machine = os.environ['MACHINE']
657 conf['MACHINE'] = machine
658 machine_arch = os.environ['MACHINE_ARCH']
660 conf['MACHINE_ARCH'] = machine_arch
661 srctop = os.environ['SB_SRC']
663 conf['SRCTOPS'].append(srctop)
664 objroot = os.environ['SB_OBJROOT']
666 conf['OBJROOTS'].append(objroot)
673 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
676 conf['MACHINE_ARCH'] = a
682 conf['HOST_TARGET'] = a
684 if a not in conf['SRCTOPS']:
685 conf['SRCTOPS'].append(a)
689 if a not in conf['OBJROOTS']:
690 conf['OBJROOTS'].append(a)
698 conf['TARGET_SPEC'] = a
700 if a not in conf['EXCLUDES']:
701 conf['EXCLUDES'].append(a)
705 conf['debug'] = debug
707 # get any var=val assignments
712 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
728 debug_out = getv(conf, 'debug_out', sys.stderr)
731 print("config:", file=debug_out)
732 print("psyco=", have_psyco, file=debug_out)
733 for k,v in list(conf.items()):
734 print("%s=%s" % (k,v), file=debug_out)
738 if a.endswith('.meta'):
739 if not os.path.exists(a):
742 elif a.startswith('@'):
743 # there can actually multiple files per line
744 for line in open(a[1:]):
745 for f in line.strip().split():
746 if not os.path.exists(f):
753 print(m.src_dirdeps('\nsrc:'))
755 dpdeps = getv(conf, 'DPDEPS')
757 m.file_depends(open(dpdeps, 'wb'))
761 if __name__ == '__main__':
765 # yes, this goes to stdout
766 print("ERROR: ", sys.exc_info()[1])