Python Pandas Cheatsheet
Remove rows containing specific string from column
# Remove rows containing string 'meow'
df = df[df['row_name'].str.contains('meow') == False]
Sum of multiple columns
# Create a new column called "added" and containing sum of columns 'a' and 'b'
df['added_col'] = df['a'] + df['b']
Progress bar for writing out CSV file
# Save a csv file for the given dataframe 'df' and output path 'loc'
# Same output as: df.to_csv(loc, index=False)
def to_csv_with_progress_bar(df, loc):
print('Saving to {} ...'.format(loc))
chunks = np.array_split(df.index, 100)
for chunk, subset in enumerate(tqdm(chunks)):
if chunk == 0:
df.loc[subset].to_csv(loc, mode='w', index=False)
else:
df.loc[subset].to_csv(loc, mode='a', index=False, header=False)