import string
import sys
import re
import rfc822
import time

if 0:
	f = open ('hw').read ()

	f = re.sub ('(?m)\narticle <[^>]+>\n', '', f)
	f = re.sub ('(?m)\n220 0 <[^>]+> article\n','',f)
	msgs = re.split ('(?m)\n\.\n+', f)
	def strip (m):
		ls = string.split  (m, '\n')
		nls = []
		f = 0
		for l in ls:
			if re.match ('^220 0', l):
				f = 1
				continue
			if f:
				nls.append (l)
		return string.join (nls, '\n')

#	msgs = map (strip, msgs) 

	cs = []
	mss = []
	for msg in msgs:
		f = string.find (msg,'cmsg cancel')

		if f == -1:
			mss.append (msg)
		else:
			cs.append (msg)


	i = 0
	print len(mss), len (cs)

	msg_files =[]
	for ms in mss:
		i = i + 1
		fn = '%d.txt' %i 
		f = open (fn, 'w')
		f.write (ms)
		msg_files.append (fn)

msgs= []
for n in range(1,321):
	fi = open ('%d.txt' % n)
	msg = rfc822.Message (fi)
	msgs.append (msg)
	msg.filename = '%d.txt' % n

# 51 posts zonder header 
histo = 0
if histo:
	times = []
	for m in msgs:
		date = None
		try:
			date = m.getdate ('Date')
		except IndexError:
			pass

		if date:
			t = time.mktime (date)
			times.append ('%f' % t)
		else:
			print '%s' % m.filename


	open ('spam-dates', 'w').write( string.join (times, '\n'))

authors = {}
for m in msgs:
	f = m.getheader ('From')
	if authors.has_key (f):
		authors[f] = authors[f] + 1
	else:
		authors[f] = 1

ad = []
for (k,v) in authors.items () :
	ad.append ((v,k))
ad.sort()
ad.reverse ()
for a in ad:
	print '%d : %s' % a
	




		
		




	
