finished pandas tutorial...

2024-11-11 22:30:06 +00:00 · 2024-11-11 22:30:06 +00:00 · 9cb386f8c1
commit 9cb386f8c1
parent 83f42a9a8d
1 changed files with 80 additions and 0 deletions
--- a/5_pandas.py
+++ b/5_pandas.py
@ -0,0 +1,80 @@
+"""
+I am not a expert on pandas and have had limited experience with it, but im going to cover
+the very basics of it. Panda is similiar to a database (dont kill me) but is used mainly
+for data analysis and manipulation. it mainly uses the CSV format, also used for spreedsheets.
+(I honestly am too lazy to write out my own examples, thank chatGTP, but if you have any 
+questions i will be able to answer them!)
+
+
+Key Concepts:
+    **DataFrame** is a 2-dimensional table with rows and columns, again the exact same as a spreadsheet.
+    **Series** is a 1-dimensional array, like a single column of data.
+"""
+
+import pandas as pd
+
+# 1. Creating a DataFrame from a Dictionary
+data = {
+    "Name": ["Alice", "Bob", "Charlie"],
+    "Age": [24, 27, 22],
+    "City": ["New York", "Los Angeles", "Chicago"]
+}
+df = pd.DataFrame(data)
+print("DataFrame:\n", df)
+
+# 2. Selecting Columns and Rows
+# Select a single column
+print("\nNames column:\n", df["Name"])
+
+# Select multiple columns
+print("\nName and Age columns:\n", df[["Name", "Age"]])
+
+# Select a row by index
+print("\nFirst row:\n", df.iloc[0])
+
+# Select rows with a condition
+print("\nPeople older than 23:\n", df[df["Age"] > 23])
+
+# 3. Data Exploration
+print("\nDataFrame Info:")
+print(df.info())
+
+print("\nBasic Statistics:\n", df.describe())
+
+# 4. Handling Missing Data
+data_with_nan = {
+    "Name": ["Alice", "Bob", None],
+    "Age": [24, None, 22],
+    "City": ["New York", "Los Angeles", None]
+}
+df_nan = pd.DataFrame(data_with_nan)
+print("\nDataFrame with NaN:\n", df_nan)
+
+# Fill missing values
+print("\nFill NaN with 'Unknown':\n", df_nan.fillna("Unknown"))
+
+# Drop rows with missing values
+print("\nDrop rows with NaN:\n", df_nan.dropna())
+
+# 5. Adding and Modifying Columns
+# Add a new column
+df["Salary"] = [70000, 80000, 50000]
+print("\nDataFrame with Salary:\n", df)
+
+# Update values based on a condition
+df.loc[df["Age"] > 25, "Salary"] *= 1.10
+print("\nUpdated Salary for Age > 25:\n", df)
+
+# 6. Grouping and Aggregation
+# Group by City and calculate the average Age
+print("\nAverage Age by City:\n", df.groupby("City")["Age"].mean())
+
+# 7. Reading from and Writing to CSV (uncomment to use)
+# Read from a CSV
+# Commented out as it doesnt exist...
+# df_csv = pd.read_csv("path/to/file.csv")
+# print("\nData from CSV:\n", df_csv)
+
+# Write to a CSV
+# Commented out as i dont want to actually save it.
+#df.to_csv("path/to/save.csv", index=False)