import csv
import random
from array import array
import glob

# indexes into data arrays
CENTER = 0
TABLE = 1
MACHINE = 2
YES = 3
NO = 4

# read in the original spreadsheet converted to CSV
def read_csv_comprehensive(filename):
	reader = csv.reader(file(filename))
	data = []
	for row in reader:
		center = int(row[3])
		table = int(row[4])
		machine = int(row[5])
		no = int(row[6])
		yes = int(row[8])
		data.append([center, table, machine, yes, no])
	return data;


# read in a data file in my simple format making a dictionary keyed off the center id
def read_csv_simple(filename):
	reader = csv.reader(file(filename))
	data = {}
	for row in reader:
		center = int(row[0])
		table = int(row[1])
		machine = int(row[2])
		yes = int(row[3])
		no = int(row[4])
		if data.has_key(center):
			data[center].append([center, table, machine, yes, no])
		else:
			data[center] = [[center, table, machine, yes, no]]
	return data;

# write out a data file in my simple format (data must be flat)
def write_csv_simple(filename, data):
	writer = csv.writer(file(filename,"w"))
	for row in data:
		writer.writerow(row)


# return a list of the voting center ids
def create_center_set(data):
	return data.keys()

# return the data records accociated with center	
def records_in_center(data, center):
	return data[center]
	
# return the number of yes votes in data	
def yes_votes(data):
	sum = 0
	for rec in data:
		sum += rec[YES]
	return sum

# return the number of no votes in data
def no_votes(data):
	sum = 0
	for rec in data:
		sum += rec[NO]
	return sum

# return a lists of the total votes on each machine in data
def machine_totals(data):
	l = []
	for rec in data:
		l.append(rec[YES]+rec[NO])
	return l

# return an array with Y's and N's corresponding to the number of yes and no votes
def make_deck(data):
	yes_list = array('c', 'Y') * yes_votes(data)
	no_list = array('c', 'N') * no_votes(data)
	return yes_list + no_list

# return a list of the votes assigned to each voting machine
def deal(deck,machines):
	dealt = 0
	l = []
	for m in machines:
		l.append(deck[dealt:dealt+m])
		dealt += m
	return l

# silmulate one voting center
def simulate_center(data, center):
	data = records_in_center(data, center)
	deck = make_deck(data)
	random.shuffle(deck);
	hands = deal(deck,machine_totals(data))
	i = 0
	l = []
	for rec in data:
		yes = hands[i].count('Y')
		no = hands[i].count('N')
		l.append([rec[CENTER], rec[TABLE], rec[MACHINE], yes, no])
		i += 1
	return l

# simulate an entire election using the data and the list of center ids
def simulate_election(data, centers):
	l = []
	for center in centers:
		l = l + simulate_center(data, center)
	return l

# run n full simulations		
def run_simulations(n):
	data = read_csv_simple("votes-simple.csv")
	centers = create_center_set(data)
	for i in range(n):
		print i
		output = simulate_election(data, centers)
		write_csv_simple("votes-random-" + str(i) +".csv", output)

# are there duplicate elements in l?
def dups(l):
	set = {}
	map(set.__setitem__, l, [])
	return len(set.keys()) != len(l)

# find the statistic we're interested in for one file
def file_stats(filename):
	data = read_csv_simple(filename)
	centers = create_center_set(data)
	count, yescount, nocount = 0, 0, 0
	length = len(centers)
	for center in centers:
		recs = records_in_center(data,center)			
		yes, no =[], []
		for rec in recs:
			yes.append(rec[YES])
			no.append(rec[NO])
		#replace depending on statistic; this computes cap-consistent precints
		if yes.count(max(yes)) > 1:
			count += 1
	return count

# print and return stats on all files in directory
def gather_stats():
	g = glob.glob("*.csv")
	s = []
	for f in g:
		t = count_collide(f)
		print t
		s.append(t)
	return s

regreso a documentos