#!/bin/env python

# Cleans CSV files with line-breaks in the middle of text fields
# - Assumes all fields surrounded with ""

import sys

try:
    if len(sys.argv) > 1:
        # Run as python scriptname xxx.csv
        input = sys.argv[1]
    else:
        # Run as ./scriptname xxx.csv
        input = sys.argv[0]
except:
    print "Specify CSV file as argument: python cleanCSV.py myfile.csv"
    sys.exit(2)

try:
    prefix, extension = input.split(".", 1)
except:
    print "Invalid filename!"
    sys.exit(2)

if extension != "csv":
    print "Input file should be xxx.csv!"
    sys.exit(2)

try:
    inputFile = open(input, "r")
except:
    print "Cannot open file!"
    sys.exit(2)

lines = []
append = None
for line in inputFile:
    line = line.strip()
    if append:
        line = append + line
    if not line.endswith('"'):
        # This must be a line-break in the middle of a text field
        append = line
        continue
    lines.append(line)
    append = None

inputFile.close()

output = "%s-fixed.csv" % prefix
outputFile = open(output, "w")
outputFile.write("\n".join(lines))
outputFile.close()
