Tuesday, March 13, 2018

Write to a gzipped csv from io.TextIO

I needed to get a compressed (gzip) csv to an Amazon S3 bucket.

The below Python 3 code utilizes csv.writer and gzip in one step.

Once the gzip memory file is written, the file is shipped to s3 using boto3.



import io
import gzip
import csv
import boto3
import os


destination_bucket = 'your-bucket'
destination_directory = 'your/directory/'
destination_filename = 'text.csv.gz'

your_access_key = os.environ['AWS_ACCESS_KEY']
your_secret_key = os.environ['AWS_SECRET_KEY']

data = [['col1','col2','col3','col4'],
['data','data','three','four'],
['data','data','three','four'],
['data','data','three','four']]

mem_file = io.BytesIO()

with gzip.GzipFile(fileobj=mem_file,mode='w') as gz:
buff = io.StringIO()
writer = csv.writer(buff)

writer.writerows(data)

print("Writing data to gzipped file.")
gz.write(buff.getvalue().encode())
print("Data written")
gz.close()
mem_file.seek(0)

s3 = boto3.client('s3',aws_access_key_id=your_access_key,aws_secret_access_key=your_secret_key)

print("Sending to S3")
s3.upload_fileobj(Fileobj=mem_file, Bucket=destination_bucket, Key=destination_directory+destination_filename)
print("Sent")


No comments:

Post a Comment