diff --git a/5_pandas.py b/5_pandas.py index e69de29..f3d95a9 100644 --- a/5_pandas.py +++ b/5_pandas.py @@ -0,0 +1,80 @@ +""" +I am not a expert on pandas and have had limited experience with it, but im going to cover +the very basics of it. Panda is similiar to a database (dont kill me) but is used mainly +for data analysis and manipulation. it mainly uses the CSV format, also used for spreedsheets. +(I honestly am too lazy to write out my own examples, thank chatGTP, but if you have any +questions i will be able to answer them!) + + +Key Concepts: + **DataFrame** is a 2-dimensional table with rows and columns, again the exact same as a spreadsheet. + **Series** is a 1-dimensional array, like a single column of data. +""" + +import pandas as pd + +# 1. Creating a DataFrame from a Dictionary +data = { + "Name": ["Alice", "Bob", "Charlie"], + "Age": [24, 27, 22], + "City": ["New York", "Los Angeles", "Chicago"] +} +df = pd.DataFrame(data) +print("DataFrame:\n", df) + +# 2. Selecting Columns and Rows +# Select a single column +print("\nNames column:\n", df["Name"]) + +# Select multiple columns +print("\nName and Age columns:\n", df[["Name", "Age"]]) + +# Select a row by index +print("\nFirst row:\n", df.iloc[0]) + +# Select rows with a condition +print("\nPeople older than 23:\n", df[df["Age"] > 23]) + +# 3. Data Exploration +print("\nDataFrame Info:") +print(df.info()) + +print("\nBasic Statistics:\n", df.describe()) + +# 4. Handling Missing Data +data_with_nan = { + "Name": ["Alice", "Bob", None], + "Age": [24, None, 22], + "City": ["New York", "Los Angeles", None] +} +df_nan = pd.DataFrame(data_with_nan) +print("\nDataFrame with NaN:\n", df_nan) + +# Fill missing values +print("\nFill NaN with 'Unknown':\n", df_nan.fillna("Unknown")) + +# Drop rows with missing values +print("\nDrop rows with NaN:\n", df_nan.dropna()) + +# 5. Adding and Modifying Columns +# Add a new column +df["Salary"] = [70000, 80000, 50000] +print("\nDataFrame with Salary:\n", df) + +# Update values based on a condition +df.loc[df["Age"] > 25, "Salary"] *= 1.10 +print("\nUpdated Salary for Age > 25:\n", df) + +# 6. Grouping and Aggregation +# Group by City and calculate the average Age +print("\nAverage Age by City:\n", df.groupby("City")["Age"].mean()) + +# 7. Reading from and Writing to CSV (uncomment to use) +# Read from a CSV +# Commented out as it doesnt exist... +# df_csv = pd.read_csv("path/to/file.csv") +# print("\nData from CSV:\n", df_csv) + +# Write to a CSV +# Commented out as i dont want to actually save it. +#df.to_csv("path/to/save.csv", index=False)