#!/usr/bin/env python import re import subprocess import sys import getopt line_comment_re = re.compile(r'//.*|/\*.*?\*/') comment_open_re = re.compile(r'(.*)/\*.*') comment_close_re = re.compile(r'.*?\*/(.*)') inc_re = re.compile(r'#include\s*(\S*).*') ginc_re = re.compile(r'<(linux/\S*)>') mainh_re = re.compile(r'linux/(bitmap|bootmem|bug|cpu|err|errno|file|fs|interrupt|irq|kernel|list|log2|mm|module|mutex|net|pfn|sched|spinlock|vmalloc|workqueue)\.h') targets = (('linux/gfp.h', re.compile(r'^(|.*[^a-zA-Z0-9_])_*(GFP_|alloc_page|get_free_page|get_zeroed_page|get_dma_pages|free_page|free_hot_cold_page|drain_(zone|all|local)_pages|(set|clear)_gfp_allowed_mask)')), ('linux/slab.h', re.compile(r'^(|.*[^a-zA-Z0-9_])_*(slab_is_available|kmem_cache_|k[mzc]alloc|krealloc|kz?free|ksize|__getname|putname)'))) dbg = 0 def pdd(l): if dbg > 1: print 'DBG ' + l def pd(l): if dbg: print 'DBG ' + l def pe(l): print 'ERR ' + l def pi(l): print 'INF ' + l def determine_ins_pos(ins, start, end, c): global targets # # incs is original order, orders[0] is sorted alphabetically, 1 # chrismastree, 2 reverse christmas tree. orders also include # @ins in them # incs = c[start:end] orders = [None,] * 3 src = incs[0:len(incs)] src.append(ins) orders[0] = src[0:len(src)] orders[1] = src[0:len(src)] orders[0].sort() orders[1].sort(cmp=lambda x,y: len(x) - len(y)) orders[2] = orders[1][0:len(src)] orders[2].reverse() # # Build reverse lookup table to map include string to the # position. The positions of @ins are recorded separated and # excluded from reverse maps. # lookup_tbls = [{} for i in range(len(orders))] ins_xs = [None,] * 3 for i in range(len(orders)): order = orders[i] adj = 0 for j in range(len(order)): inc = order[j] if inc == ins: ins_xs[i] = j adj = -1 else: lookup_tbls[i][inc] = j + adj # and map incs to position numbers for each order seqs = [[] for i in range(len(orders))] for o in range(len(orders)): for inc in incs: seqs[o].append(lookup_tbls[o][inc]) # # Fit Y = mX + c and choose the one which is closest to m == 1 # # S is sigma, _X_ and _Y_ are respectively means of Xi's and Yi's. # # m = S (Xi - _X_)Yi / S (Xi - _X_)^2 # c = _Y_ - m_X_ # best_delta = 9999 best_o = best_m = best_c = -1 avg_x = avg_y = (len(incs) - 1) / 2.0 b = 0.0 for i in range(len(incs)): b += (i - avg_x) ** 2 if b != 0: for s in range(len(seqs)): t = 0.0 for i in range(len(incs)): t += (i - avg_x) * seqs[s][i] m = t / b c = avg_y - m * avg_x delta = abs(m - 1) if delta < best_delta: best_o = s best_m = m best_c = c best_delta = delta if best_delta <= 0.5: pos = start + round(best_m * ins_xs[best_o] + best_c) pd("ins order=%d m=%f c=%f delta=%f pos=%d" % (best_o, best_m, best_c, best_delta, pos)) else: pos = end pd("ins order=- m=%f c=%f delta=%f pos=%d" % (best_m, best_c, best_delta, pos)) return pos def process_file(f, commit): global inc_re, ginc_re, mainh_re, targets pd("** Processing %s" % f.name) have_targets = [-1,] * len(targets) need_targets = [False,] * len(targets) dup_incs = False in_comment = False best_inc_score = -1 best_inc_start = best_inc_end = -1 inc_score = -1 inc_start = inc_end = -1 c = f.read().splitlines(False) lineno = -1 for l in c: lineno += 1 if in_comment: m = comment_close_re.match(l) if m: l = m.group(1) in_comment = False else: l = '' if not in_comment: l = line_comment_re.sub('', l) m = comment_open_re.match(l) if m: l = m.group(1) in_comment = True # see whether insertion is necessary and locate the best include group ginc = None m = inc_re.match(l) if m: m = ginc_re.match(m.group(1)) if m: ginc = m.group(1) if ginc: if inc_start < 0: inc_start = lineno inc_score = 0 if mainh_re.match(ginc): inc_score += 1 i = 0 for tinc, tre in targets: if tinc == ginc: if have_targets[i] >= 0: dup_incs = True else: have_targets[i] = lineno i += 1 else: if inc_start >= 0: inc_end = lineno pdd('cand_inc: %d-%d %d' % (inc_start, inc_end, inc_score)) if inc_score > best_inc_score: best_inc_score = inc_score best_inc_start = inc_start best_inc_end = inc_end inc_start = -1 i = 0 for tinc, tre in targets: if tre.search(l): pdd('%5d r%d: %s' % (lineno, i, l)) need_targets[i] = True i += 1 pd('need: %s have: %s best: %s-%s %s' % (need_targets, have_targets, best_inc_start, best_inc_end, best_inc_score)) need_high = -1 need_del = [] need_ins = None for i in range(0, len(targets)): if need_targets[i]: need_high = i for i in range(0, len(targets)): if i == need_high: if have_targets[i] < 0: need_ins = '#include <' + targets[i][0] + '>' else: if have_targets[i] >= 0: need_del.append(have_targets[i]) pd('need_del: %s need_ins: %s' % (need_del, need_ins)) if (((need_ins or need_del) and dup_incs) or (need_ins and best_inc_start < 0)): reason = '' if dup_incs: reason += 'dup' if need_ins and best_inc_start < 0: if reason: reason += '/' reason += 'missing' pe('%s: can\'t edit, %s includes, ' 'ins: %s del: %s' % (f.name, reason, need_ins, need_del)) return -1 ins_pos = -1 if need_ins: ins_pos = determine_ins_pos(need_ins, best_inc_start, best_inc_end, c) if need_ins or need_del: pi('%s: delete %s insert @%d "%s"' % (f.name, need_del, ins_pos, need_ins)) else: return 0 if not commit: return 0 # apply edits pd('editing %s' % f.name) f.seek(0) f.truncate(0) adj = 0 for l in range(len(c)): if l == ins_pos: f.write(need_ins + '\n') if l not in need_del: f.write(c[l] + '\n') return 1 def main(): global dbg commit = False mode = 'r' # parse arguments try: opts, args = getopt.gnu_getopt(sys.argv[1:], 'cd', ['commit', 'debug']); except getopt.GetoptError, e: pe(str(e)) return 1 for o, a in opts: if o in ('-c', '--commit'): commit = True mode = 'r+' if o in ('-d', '--debug'): dbg += 1 if not args: args = ["*.c",] p = subprocess.Popen(["git", "ls-files"] + args, stdout=subprocess.PIPE); while True: file = p.stdout.readline() if len(file) == 0: break process_file(open(file.strip(), mode), commit) main()