#!/bin/env python

# Splits CSV files into smaller segments for more reliable importing

# Number of rows per file
size = 10000

import sys

try:
    if len(sys.argv) > 1:
        # Run as python scriptname xxx.csv
        input = sys.argv[1]
    else:
        # Run as ./scriptname xxx.csv
        input = sys.argv[0]
except:
    print "Specify CSV file as argument: python cleanCSV.py myfile.csv"
    sys.exit(2)

try:
    prefix, extension = input.split(".", 1)
except:
    print "Invalid filename!"
    sys.exit(2)

if extension != "csv":
    print "Input file should be xxx.csv!"
    sys.exit(2)

try:
    inputFile = open(input, "r")
except:
    print "Cannot open file!"
    sys.exit(2)

header = None
segment = 1
line_number = 0
start = True

for line in inputFile:
    if not header:
        header = line
        continue
    if start:
        output = "%s-%s.csv" % (prefix, segment)
        outputFile = open(output, "w")
        outputFile.write(header)
        start = False
    outputFile.write(line)
    line_number += 1
    if line_number == size:
        outputFile.close()
        segment += 1
        line_number = 0
        start = True

inputFile.close()
outputFile.close()
