Monday, 16 December 2024

Key Panda functions

Function Description Example
pd.DataFrame()
Creates a DataFrame, a 2D labeled data structure, 
            from a variety of inputs like dictionaries or arrays.
import pandas as pd
data = {'A': [1, 2], 'B': [3, 4]}
df = pd.DataFrame(data)
print(df)
                
pd.read_csv() Reads a CSV file and creates a DataFrame.
df = pd.read_csv('file.csv')
print(df.head())
                
df.head() Displays the first n rows of the DataFrame (default is 5).
print(df.head(3))
                
df.tail() Displays the last n rows of the DataFrame (default is 5).
print(df.tail(2))
                
df.info() Provides a summary of the DataFrame, including column data types and non-null values.
df.info()
                
df.describe() Generates descriptive statistics for numeric columns.
print(df.describe())
                
df.isnull() Returns a DataFrame indicating where null values are present.
print(df.isnull())
                
df.fillna() Replaces NaN values with a specified value.
df = df.fillna(0)
print(df)
                
df.dropna() Removes rows or columns with NaN values.
df = df.dropna()
print(df)
                
df.loc[] Accesses a group of rows and columns by labels or boolean array.
print(df.loc[0, 'A'])
                
df.iloc[] Accesses rows and columns by integer index positions.
print(df.iloc[0, 1])
                
df.sort_values() Sorts the DataFrame by a specified column or columns.
df = df.sort_values(by='A')
print(df)
                
df.groupby() Groups DataFrame rows based on a column and allows for aggregation.
grouped = df.groupby('A').sum()
print(grouped)
                
df.merge() Merges two DataFrames on a key or keys.
df1.merge(df2, on='key')
                
df.concat() Concatenates DataFrames along a specified axis.
pd.concat([df1, df2], axis=0)
                
df.pivot() Creates a pivot table based on unique values of columns.
pivot = df.pivot(index='A', columns='B', values='C')
print(pivot)
                
df.apply() Applies a function along an axis (row-wise or column-wise).
df['A'] = df['A'].apply(lambda x: x*2)
print(df)
                
df.value_counts() Returns a Series with the count of unique values.
print(df['A'].value_counts())
                
df.corr() Computes the pairwise correlation of columns in the DataFrame.
print(df.corr())
                
pd.to_datetime() Converts a column or series to a datetime object.
df['date'] = pd.to_datetime(df['date'])
print(df)
                
df.plot() Generates plots for visualizing DataFrame or Series data (requires Matplotlib).
df['A'].plot(kind='line')
plt.show()
                
df.to_csv() Writes a DataFrame to a CSV file.
df.to_csv('output.csv', index=False)
                
df.drop() Removes specified labels from rows or columns.
df = df.drop(columns=['A'])
print(df)
                
df.rename() Renames columns or indices.
df = df.rename(columns={'A': 'New_A'})
print(df)
                
df.duplicated() Checks for duplicate rows in the DataFrame.
print(df.duplicated())
                
df.nunique() Counts the number of unique values in each column.
print(df.nunique())
                
pd.cut() Bins continuous values into discrete intervals.
df['bins'] = pd.cut(df['A'], bins=3)
print(df)
                
pd.get_dummies() Converts categorical variables into dummy/indicator variables.
df = pd.get_dummies(df, columns=['Category'])
print(df)
                

No comments:

Post a Comment