1 #!/bin/sh 2 """"exec ${PYTHON:-python} -t $0 "$@";" """ 3 # vim: filetype=python expandtab smarttab shiftwidth=4 4 5 6 import sys 7 import string 8 from operator import itemgetter as nth 9 10 11 alphanumerics = string.lowercase + string.digits 12 13 def emit_top_new(paragraph, show_count, previous, keep_previous_count): 14 """Given some text and a list of disallowed characters, return two samples 15 of the most common letters. One sample is likely to be fed back in as a 16 disallowed list. 17 18 >>> emit_top_new("", 3, ['1', '2'], 2) 19 ([], []) 20 >>> emit_top_new("abcdefghighighijkljklmnopppp", 3, [], 2) 21 (['p', 'h', 'i'], ['p', 'h']) 22 >>> emit_top_new("abcdefghighighijkljklmnopppp", 3, ['p', 'h'], 2) 23 (['i', 'g', 'l'], ['i', 'g']) 24 """ 25 character_count = dict([c, 0] for c in alphanumerics) 26 27 for c in previous: 28 character_count.pop(c, None) 29 30 for c in paragraph.lower(): 31 try: 32 character_count[c] += 1 33 except KeyError: 34 pass # not a character we care about 35 36 sortable = character_count.items() 37 38 sortable.sort(key=nth(1)) 39 sortable.reverse() 40 sortable = [k for k, v in sortable if v > 0] 41 42 return sortable[:show_count], sortable[:keep_previous_count] 43 44 45 if __name__ == "__main__": 46 import doctest 47 doctest.testmod() 48 49 show_count = 10 50 keep_previous_count = 5 51 previous = set() 52 53 lines = [] 54 for line in sys.stdin: 55 if line.strip() == "": 56 if lines: 57 new, previous = emit_top_new("".join(lines), show_count, previous, keep_previous_count) 58 print new 59 lines = [] 60 else: 61 lines.append(line) 62 63 if lines: 64 new, previous = emit_top_new("".join(lines), show_count, previous, keep_previous_count) 65 print new