4 This script parses each "meta" file and extracts the
5 information needed to deduce build and src dependencies.
7 It works much the same as the original shell script, but is
10 The parsing work is handled by the class MetaFile.
11 We only pay attention to a subset of the information in the
12 "meta" files. Specifically:
14 'CWD' to initialize our notion.
16 'C' to track chdir(2) on a per process basis
18 'R' files read are what we really care about.
19 directories read, provide a clue to resolving
20 subsequent relative paths. That is if we cannot find
21 them relative to 'cwd', we check relative to the last
24 'W' files opened for write or read-write,
25 for filemon V3 and earlier.
31 'V' the filemon version, this record is used as a clue
32 that we have reached the interesting bit.
38 $Id: meta2deps.py,v 1.15 2013/07/29 20:41:23 sjg Exp $
40 Copyright (c) 2011-2013, Juniper Networks, Inc.
43 Redistribution and use in source and binary forms, with or without
44 modification, are permitted provided that the following conditions
46 1. Redistributions of source code must retain the above copyright
47 notice, this list of conditions and the following disclaimer.
48 2. Redistributions in binary form must reproduce the above copyright
49 notice, this list of conditions and the following disclaimer in the
50 documentation and/or other materials provided with the distribution.
52 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
53 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
54 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
55 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
56 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 def getv(dict, key, d=None):
69 """Lookup key in dict and return value or the supplied default."""
74 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
76 Return an absolute path, resolving via cwd or last_dir if needed.
78 if path.endswith('/.'):
80 if len(path) > 0 and path[0] == '/':
84 if path.startswith('./'):
88 for d in [last_dir, cwd]:
91 p = '/'.join([d,path])
93 print >> debug_out, "looking for:", p,
94 if not os.path.exists(p):
96 print >> debug_out, "nope"
100 print >> debug_out, "found:", p
104 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
106 Return an absolute path, resolving via cwd or last_dir if needed.
107 this gets called a lot, so we try to avoid calling realpath
108 until we know we have something.
110 rpath = resolve(path, cwd, last_dir, debug, debug_out)
113 if (path.find('./') > 0 or
114 path.endswith('/..') or
115 os.path.islink(path)):
116 return os.path.realpath(path)
119 def sort_unique(list, cmp=None, key=None, reverse=False):
120 list.sort(cmp, key, reverse)
130 return ['/' + x + '/',
136 """class to parse meta files generated by bmake."""
149 def __init__(self, name, conf={}):
150 """if name is set we will parse it now.
151 conf can have the follwing keys:
153 SRCTOPS list of tops of the src tree(s).
155 CURDIR the src directory 'bmake' was run from.
157 RELDIR the relative path from SRCTOP to CURDIR
159 MACHINE the machine we built for.
160 set to 'none' if we are not cross-building.
161 More specifically if machine cannot be deduced from objdirs.
164 Sometimes MACHINE isn't enough.
167 when we build for the psuedo machine 'host'
168 the object tree uses HOST_TARGET rather than MACHINE.
170 OBJROOTS a list of the common prefix for all obj dirs it might
173 DPDEPS names an optional file to which per file dependencies
175 For example if 'some/path/foo.h' is read from SRCTOP
176 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
177 This can allow 'bmake' to learn all the dirs within
178 the tree that depend on 'foo.h'
180 debug desired debug level
182 debug_out open file to send debug output to (sys.stderr)
187 self.debug = getv(conf, 'debug', 0)
188 self.debug_out = getv(conf, 'debug_out', sys.stderr)
190 self.machine = getv(conf, 'MACHINE', '')
191 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
192 self.target_spec = getv(conf, 'TARGET_SPEC', '')
193 self.curdir = getv(conf, 'CURDIR')
194 self.reldir = getv(conf, 'RELDIR')
195 self.dpdeps = getv(conf, 'DPDEPS')
199 # some of the steps below we want to do only once
201 self.host_target = getv(conf, 'HOST_TARGET')
202 for srctop in getv(conf, 'SRCTOPS', []):
203 if srctop[-1] != '/':
205 if not srctop in self.srctops:
206 self.srctops.append(srctop)
207 _srctop = os.path.realpath(srctop)
208 if _srctop[-1] != '/':
210 if not _srctop in self.srctops:
211 self.srctops.append(_srctop)
213 trim_list = add_trims(self.machine)
214 if self.machine == 'host':
215 trim_list += add_trims(self.host_target)
217 trim_list += add_trims(self.target_spec)
219 for objroot in getv(conf, 'OBJROOTS', []):
221 if objroot.endswith(e):
222 # this is not what we want - fix it
223 objroot = objroot[0:-len(e)]
226 if not objroot in self.objroots:
227 self.objroots.append(objroot)
228 _objroot = os.path.realpath(objroot)
229 if objroot[-1] == '/':
231 if not _objroot in self.objroots:
232 self.objroots.append(_objroot)
234 # we want the longest match
235 self.srctops.sort(reverse=True)
236 self.objroots.sort(reverse=True)
239 print >> self.debug_out, "host_target=", self.host_target
240 print >> self.debug_out, "srctops=", self.srctops
241 print >> self.debug_out, "objroots=", self.objroots
243 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
245 if self.dpdeps and not self.reldir:
247 print >> self.debug_out, "need reldir:",
249 srctop = self.find_top(self.curdir, self.srctops)
251 self.reldir = self.curdir.replace(srctop,'')
253 print >> self.debug_out, self.reldir
255 self.dpdeps = None # we cannot do it?
257 self.cwd = os.getcwd() # make sure this is initialized
263 """reset state if we are being passed meta files from multiple directories."""
269 def dirdeps(self, sep='\n'):
271 return sep.strip() + sep.join(self.obj_deps)
273 def src_dirdeps(self, sep='\n'):
274 """return SRC_DIRDEPS"""
275 return sep.strip() + sep.join(self.src_deps)
277 def file_depends(self, out=None):
278 """Append DPDEPS_${file} += ${RELDIR}
279 for each file we saw, to the output file."""
282 for f in sort_unique(self.file_deps):
283 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
285 def seenit(self, dir):
286 """rememer that we have seen dir."""
289 def add(self, list, data, clue=''):
290 """add data to list if it isn't already there."""
294 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
296 def find_top(self, path, list):
297 """the logical tree may be split accross multiple trees"""
299 if path.startswith(top):
301 print >> self.debug_out, "found in", top
305 def find_obj(self, objroot, dir, path, input):
306 """return path within objroot, taking care of .dirdep files"""
308 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
309 if not ddep and os.path.exists(ddepf):
310 ddep = open(ddepf, 'rb').readline().strip('# \n')
312 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
313 if ddep.endswith(self.machine):
314 ddep = ddep[0:-(1+len(self.machine))]
315 elif self.target_spec and ddep.endswith(self.target_spec):
316 ddep = ddep[0:-(1+len(self.target_spec))]
319 # no .dirdeps, so remember that we've seen the raw input
322 if self.machine == 'none':
323 if dir.startswith(objroot):
324 return dir.replace(objroot,'')
326 m = self.dirdep_re.match(dir.replace(objroot,''))
329 dmachine = m.group(1)
330 if dmachine != self.machine:
331 if not (self.machine == 'host' and
332 dmachine == self.host_target):
334 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
335 ddep += '.' + dmachine
339 def try_parse(self, name=None, file=None):
340 """give file and line number causing exception"""
342 self.parse(name, file)
345 print >> sys.stderr, '{}:{}: '.format(self.name, self.line),
348 def parse(self, name=None, file=None):
349 """A meta file looks like:
351 # Meta data file "path"
356 -- filemon acquired metadata --
366 L "pid" "src" "target"
371 We go to some effort to avoid processing a dependency more than once.
372 Of the above record types only C,E,F,L,R,V and W are of interest.
375 version = 0 # unknown
380 cwd = last_dir = self.cwd
382 f = open(self.name, 'rb')
390 self.seenit(self.curdir) # we ignore this
392 interesting = 'CEFLRV'
395 # ignore anything we don't care about
396 if not line[0] in interesting:
399 print >> self.debug_out, "input:", line,
408 # we cannot ignore 'W' records
409 # as they may be 'rw'
413 self.cwd = cwd = last_dir = w[1]
414 self.seenit(cwd) # ignore this
416 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
422 pid_cwd[last_pid] = cwd
423 pid_last_dir[last_pid] = last_dir
424 cwd = getv(pid_cwd, pid, self.cwd)
425 last_dir = getv(pid_last_dir, pid, self.cwd)
432 pid_last_dir[npid] = cwd
436 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
437 if cwd.endswith('/.'):
441 print >> self.debug_out, "cwd=", cwd
444 if w[2] in self.seen:
446 print >> self.debug_out, "seen:", w[2]
450 path = w[2].strip("'")
453 # we are never interested in .dirdep files as dependencies
454 if path.endswith('.dirdep'):
456 # we don't want to resolve the last component if it is
458 path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
461 dir,base = os.path.split(path)
464 print >> self.debug_out, "seen:", dir
466 # we can have a path in an objdir which is a link
467 # to the src dir, we may need to add dependencies for each
469 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
470 if rdir == dir or rdir.find('./') > 0:
472 # now put path back together
473 path = '/'.join([dir,base])
475 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
477 if w[0] == 'W' and path.endswith('.dirdep'):
479 if path in [last_dir, cwd, self.cwd, self.curdir]:
481 print >> self.debug_out, "skipping:", path
483 if os.path.isdir(path):
487 print >> self.debug_out, "ldir=", last_dir
491 # finally, we get down to it
492 if dir == self.cwd or dir == self.curdir:
494 srctop = self.find_top(path, self.srctops)
497 self.add(self.file_deps, path.replace(srctop,''), 'file')
498 self.add(self.src_deps, dir.replace(srctop,''), 'src')
501 if rdir and not rdir.startswith(srctop):
502 dir = rdir # for below
508 for dir in [dir,rdir]:
511 objroot = self.find_top(dir, self.objroots)
515 ddep = self.find_obj(objroot, dir, path, w[2])
517 self.add(self.obj_deps, ddep, 'obj')
519 # don't waste time looking again
526 def main(argv, klass=MetaFile, xopts='', xoptf=None):
527 """Simple driver for class MetaFile.
530 script [options] [key=value ...] "meta" ...
532 Options and key=value pairs contribute to the
533 dictionary passed to MetaFile.
536 add "SRCTOP" to the "SRCTOPS" list.
541 add "OBJROOT" to the "OBJROOTS" list.
556 # import Psyco if we can
557 # it can speed things up quite a bit
572 machine = os.environ['MACHINE']
574 conf['MACHINE'] = machine
575 machine_arch = os.environ['MACHINE_ARCH']
577 conf['MACHINE_ARCH'] = machine_arch
578 srctop = os.environ['SB_SRC']
580 conf['SRCTOPS'].append(srctop)
581 objroot = os.environ['SB_OBJROOT']
583 conf['OBJROOTS'].append(objroot)
590 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts)
593 conf['MACHINE_ARCH'] = a
599 conf['HOST_TARGET'] = a
601 if a not in conf['SRCTOPS']:
602 conf['SRCTOPS'].append(a)
606 if a not in conf['OBJROOTS']:
607 conf['OBJROOTS'].append(a)
615 conf['TARGET_SPEC'] = a
619 conf['debug'] = debug
621 # get any var=val assignments
626 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
642 debug_out = getv(conf, 'debug_out', sys.stderr)
645 print >> debug_out, "config:"
646 print >> debug_out, "psyco=", have_psyco
647 for k,v in conf.items():
648 print >> debug_out, "%s=%s" % (k,v)
651 if a.endswith('.meta'):
653 elif a.startswith('@'):
654 # there can actually multiple files per line
655 for line in open(a[1:]):
656 for f in line.strip().split():
662 print m.src_dirdeps('\nsrc:')
664 dpdeps = getv(conf, 'DPDEPS')
666 m.file_depends(open(dpdeps, 'wb'))
670 if __name__ == '__main__':
674 # yes, this goes to stdout
675 print "ERROR: ", sys.exc_info()[1]