Python Pandas Cheatsheet

Remove rows containing specific string from column

# Remove rows containing string 'meow'
df = df[df['row_name'].str.contains('meow') == False]

Sum of multiple columns

# Create a new column called "added" and containing sum of columns 'a' and 'b'
df['added_col'] = df['a'] + df['b']

Progress bar for writing out CSV file

# Save a csv file for the given dataframe 'df' and output path 'loc'
# Same output as: df.to_csv(loc, index=False)
def to_csv_with_progress_bar(df, loc):
    print('Saving to {} ...'.format(loc))
    chunks = np.array_split(df.index, 100)
    for chunk, subset in enumerate(tqdm(chunks)):
        if chunk == 0:
            df.loc[subset].to_csv(loc, mode='w', index=False)
        else:
            df.loc[subset].to_csv(loc, mode='a', index=False, header=False)

Leave a Reply

Your email address will not be published. Required fields are marked *