finished pandas tutorial...
This commit is contained in:
parent
83f42a9a8d
commit
9cb386f8c1
1 changed files with 80 additions and 0 deletions
80
5_pandas.py
80
5_pandas.py
|
@ -0,0 +1,80 @@
|
|||
"""
|
||||
I am not a expert on pandas and have had limited experience with it, but im going to cover
|
||||
the very basics of it. Panda is similiar to a database (dont kill me) but is used mainly
|
||||
for data analysis and manipulation. it mainly uses the CSV format, also used for spreedsheets.
|
||||
(I honestly am too lazy to write out my own examples, thank chatGTP, but if you have any
|
||||
questions i will be able to answer them!)
|
||||
|
||||
|
||||
Key Concepts:
|
||||
**DataFrame** is a 2-dimensional table with rows and columns, again the exact same as a spreadsheet.
|
||||
**Series** is a 1-dimensional array, like a single column of data.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# 1. Creating a DataFrame from a Dictionary
|
||||
data = {
|
||||
"Name": ["Alice", "Bob", "Charlie"],
|
||||
"Age": [24, 27, 22],
|
||||
"City": ["New York", "Los Angeles", "Chicago"]
|
||||
}
|
||||
df = pd.DataFrame(data)
|
||||
print("DataFrame:\n", df)
|
||||
|
||||
# 2. Selecting Columns and Rows
|
||||
# Select a single column
|
||||
print("\nNames column:\n", df["Name"])
|
||||
|
||||
# Select multiple columns
|
||||
print("\nName and Age columns:\n", df[["Name", "Age"]])
|
||||
|
||||
# Select a row by index
|
||||
print("\nFirst row:\n", df.iloc[0])
|
||||
|
||||
# Select rows with a condition
|
||||
print("\nPeople older than 23:\n", df[df["Age"] > 23])
|
||||
|
||||
# 3. Data Exploration
|
||||
print("\nDataFrame Info:")
|
||||
print(df.info())
|
||||
|
||||
print("\nBasic Statistics:\n", df.describe())
|
||||
|
||||
# 4. Handling Missing Data
|
||||
data_with_nan = {
|
||||
"Name": ["Alice", "Bob", None],
|
||||
"Age": [24, None, 22],
|
||||
"City": ["New York", "Los Angeles", None]
|
||||
}
|
||||
df_nan = pd.DataFrame(data_with_nan)
|
||||
print("\nDataFrame with NaN:\n", df_nan)
|
||||
|
||||
# Fill missing values
|
||||
print("\nFill NaN with 'Unknown':\n", df_nan.fillna("Unknown"))
|
||||
|
||||
# Drop rows with missing values
|
||||
print("\nDrop rows with NaN:\n", df_nan.dropna())
|
||||
|
||||
# 5. Adding and Modifying Columns
|
||||
# Add a new column
|
||||
df["Salary"] = [70000, 80000, 50000]
|
||||
print("\nDataFrame with Salary:\n", df)
|
||||
|
||||
# Update values based on a condition
|
||||
df.loc[df["Age"] > 25, "Salary"] *= 1.10
|
||||
print("\nUpdated Salary for Age > 25:\n", df)
|
||||
|
||||
# 6. Grouping and Aggregation
|
||||
# Group by City and calculate the average Age
|
||||
print("\nAverage Age by City:\n", df.groupby("City")["Age"].mean())
|
||||
|
||||
# 7. Reading from and Writing to CSV (uncomment to use)
|
||||
# Read from a CSV
|
||||
# Commented out as it doesnt exist...
|
||||
# df_csv = pd.read_csv("path/to/file.csv")
|
||||
# print("\nData from CSV:\n", df_csv)
|
||||
|
||||
# Write to a CSV
|
||||
# Commented out as i dont want to actually save it.
|
||||
#df.to_csv("path/to/save.csv", index=False)
|
Loading…
Reference in a new issue