| Function | Description | Example |
|---|---|---|
|
Creates a DataFrame, a 2D labeled data structure,
from a variety of inputs like dictionaries or arrays. |
import pandas as pd
data = {'A': [1, 2], 'B': [3, 4]}
df = pd.DataFrame(data)
print(df)
|
pd.read_csv() |
Reads a CSV file and creates a DataFrame. |
df = pd.read_csv('file.csv')
print(df.head())
|
df.head() |
Displays the first n rows of the DataFrame (default is 5). |
print(df.head(3))
|
df.tail() |
Displays the last n rows of the DataFrame (default is 5). |
print(df.tail(2))
|
df.info() |
Provides a summary of the DataFrame, including column data types and non-null values. |
df.info()
|
df.describe() |
Generates descriptive statistics for numeric columns. |
print(df.describe())
|
df.isnull() |
Returns a DataFrame indicating where null values are present. |
print(df.isnull())
|
df.fillna() |
Replaces NaN values with a specified value. |
df = df.fillna(0)
print(df)
|
df.dropna() |
Removes rows or columns with NaN values. |
df = df.dropna()
print(df)
|
df.loc[] |
Accesses a group of rows and columns by labels or boolean array. |
print(df.loc[0, 'A'])
|
df.iloc[] |
Accesses rows and columns by integer index positions. |
print(df.iloc[0, 1])
|
df.sort_values() |
Sorts the DataFrame by a specified column or columns. |
df = df.sort_values(by='A')
print(df)
|
df.groupby() |
Groups DataFrame rows based on a column and allows for aggregation. |
grouped = df.groupby('A').sum()
print(grouped)
|
df.merge() |
Merges two DataFrames on a key or keys. |
df1.merge(df2, on='key')
|
df.concat() |
Concatenates DataFrames along a specified axis. |
pd.concat([df1, df2], axis=0)
|
df.pivot() |
Creates a pivot table based on unique values of columns. |
pivot = df.pivot(index='A', columns='B', values='C')
print(pivot)
|
df.apply() |
Applies a function along an axis (row-wise or column-wise). |
df['A'] = df['A'].apply(lambda x: x*2)
print(df)
|
df.value_counts() |
Returns a Series with the count of unique values. |
print(df['A'].value_counts())
|
df.corr() |
Computes the pairwise correlation of columns in the DataFrame. |
print(df.corr())
|
pd.to_datetime() |
Converts a column or series to a datetime object. |
df['date'] = pd.to_datetime(df['date'])
print(df)
|
df.plot() |
Generates plots for visualizing DataFrame or Series data (requires Matplotlib). |
df['A'].plot(kind='line')
plt.show()
|
df.to_csv() |
Writes a DataFrame to a CSV file. |
df.to_csv('output.csv', index=False)
|
df.drop() |
Removes specified labels from rows or columns. |
df = df.drop(columns=['A'])
print(df)
|
df.rename() |
Renames columns or indices. |
df = df.rename(columns={'A': 'New_A'})
print(df)
|
df.duplicated() |
Checks for duplicate rows in the DataFrame. |
print(df.duplicated())
|
df.nunique() |
Counts the number of unique values in each column. |
print(df.nunique())
|
pd.cut() |
Bins continuous values into discrete intervals. |
df['bins'] = pd.cut(df['A'], bins=3)
print(df)
|
pd.get_dummies() |
Converts categorical variables into dummy/indicator variables. |
df = pd.get_dummies(df, columns=['Category'])
print(df)
|
Monday, 16 December 2024
Key Panda functions
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment