csv_import.py - A Simple Yet Very Practical CSV import module
I have made this simple module for converting a csv file to a list of dicts with unicode keys and values.
The module expects code to be in a data directory at the same level it is in itself.
Like: specific_importer.py csv_import.py data/ some_data.csv
This is the code:
"""
It seems that I have a bunch of data import projects.
I have made this simple module for converting a csv file to a list of dicts with unicode keys and values.
keys must be on first line.
"""
import csv
from csv import DictReader
import os, os.path, sys, shutil
from os.path import normpath, dirname, exists, abspath, join
from os import makedirs
# filesystem helpers
def mydir():
if __name__ == '__main__':
filename = sys.argv[0]
else:
filename = __file__
return abspath(dirname(filename))
def makepath(path):
if not exists(path):
makedirs(path)
return normpath(abspath(path))
def rmdir(path):
# deletes all files and subdirs, then the dir
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
os.remove(join(root, name))
for name in dirs:
os.rmdir(join(root, name))
def emptydir(path):
"Remove existing dir"
rmdir(path)
makepath(path)
class DataReader:
"""
Reads raw data from csv files and combines them into complex data structures of
primitive Python data types.
"""
def __init__(self, fname, fieldnames=None, encoding='utf-8'):
self.encoding = encoding
self.fieldnames = fieldnames
fpath = join(mydir(), 'data', fname)
self.csvfile = open(fpath, 'r')
dialect = self.sniffDialect()
self.dictreader = DictReader(self.csvfile, fieldnames=self.fieldnames, dialect=dialect)
def sniffDialect(self):
for i in range(2):
line2 = self.csvfile.readline()
self.csvfile.seek(0)
dialect = csv.Sniffer().sniff(line2)
dialect.skipinitialspace = True
return dialect
def d2u(self, d):
# dict to unicode
e = self.encoding
r = {}
for key, val in d.items():
r[key.decode(e)] = val.decode(e)
return r
def close(self):
self.csvfile.close()
def __iter__(self):
return self
def next(self):
return self.d2u(self.dictreader.next())
if __name__ == '__main__':
dr = DataReader('some-file.csv')
N_MAX = 100
i = 0
for data in dr:
if not N_MAX or (i < N_MAX):
print data
i += 1
dr.close()
print 'done'