这份备忘单源于作者在重回 Python 项目时,需要快速回顾其最新特性、最佳实践和最重要的工具的需求。它旨在总结 Python 中最实用、最核心的知识点,以解决 80% 的日常编程需求。
文章内容按逻辑区域划分,方便读者快速查找特定任务或主题。
文件操作 (File Operations)
本节涵盖了 Python 中最常见的文件读写、管理和操作方法。
- 读取文件
with open('example.txt', 'r') as file: content = file.read() print(content)
- 写入文件
with open('example.txt', 'w') as file: file.write('Hello, Python!')
- 追加内容到文件
with open('example.txt', 'a') as file: file.write('\nAppend this line.')
- 将行读取到列表中
with open('example.txt', 'r') as file: lines = file.readlines() print(lines)
- 逐行遍历文件
with open('example.txt', 'r') as file: for line in file: print(line.strip())
- 检查文件是否存在
import os if os.path.exists('example.txt'): print('File exists.') else: print('File does not exist.')
- 将列表写入文件
lines = ['First line', 'Second line', 'Third line'] with open('example.txt', 'w') as file: for line in lines: file.write(f'{line}\n')
- 使用
with
块处理多个文件with open('source.txt', 'r') as source, open('destination.txt', 'w') as destination: content = source.read() destination.write(content)
- 删除文件
import os if os.path.exists('example.txt'): os.remove('example.txt') print('File deleted.') else: print('File does not exist.')
- 读写二进制文件
# Reading a binary file with open('image.jpg', 'rb') as file: content = file.read() # Writing to a binary file with open('copy.jpg', 'wb') as file: file.write(content)
简单的 HTTP API 操作 (HTTP API Operations)
利用 requests
库进行 HTTP 请求是 Python Web 开发和数据抓取中的核心部分。
- 基本 GET 请求
import requests response = requests.get('https://api.example.com/data') data = response.json() # Assuming the response is JSON print(data)
- 带查询参数的 GET 请求
import requests params = {'key1': 'value1', 'key2': 'value2'} response = requests.get('https://api.example.com/search', params=params) data = response.json() print(data)
- 处理 HTTP 错误
import requests response = requests.get('https://api.example.com/data') try: response.raise_for_status() # Raises an HTTPError if the status is 4xx, 5xx data = response.json() print(data) except requests.exceptions.HTTPError as err: print(f'HTTP Error: {err}')
- 设置请求超时
import requests try: response = requests.get('https://api.example.com/data', timeout=5) # Timeout in seconds data = response.json() print(data) except requests.exceptions.Timeout: print('The request timed out')
- 请求中使用 Header
import requests headers = {'Authorization': 'Bearer YOUR_ACCESS_TOKEN'} response = requests.get('https://api.example.com/protected', headers=headers) data = response.json() print(data)
- 带 JSON Payload 的 POST 请求
import requests payload = {'key1': 'value1', 'key2': 'value2'} headers = {'Content-Type': 'application/json'} response = requests.post('https://api.example.com/submit', json=payload, headers=headers) print(response.json())
- 处理响应编码
import requests response = requests.get('https://api.example.com/data') response.encoding = 'utf-8' # Set encoding to match the expected response format data = response.text print(data)
- 使用 Session 进行请求
import requests with requests.Session() as session: session.headers.update({'Authorization': 'Bearer YOUR_ACCESS_TOKEN'}) response = session.get('https://api.example.com/data') print(response.json())
- 处理重定向
import requests response = requests.get('https://api.example.com/data', allow_redirects=False) print(response.status_code)
- 流式传输大型响应
import requests response = requests.get('https://api.example.com/large-data', stream=True) for chunk in response.iter_content(chunk_size=1024): # 实际处理函数,请替换 'process' # process(chunk) pass
列表操作 (List Operations)
列表是 Python 最常用的数据结构之一,掌握其操作至关重要。
- 创建列表
elements = ['Earth', 'Air', 'Fire', 'Water']
- 向列表追加元素
elements.append('Aether')
- 向列表插入元素
elements.insert(1, 'Spirit') # Insert 'Spirit' at index 1
- 从列表中移除元素
elements.remove('Earth') # Removes the first occurrence of 'Earth'
- 弹出列表元素
last_element = elements.pop() # Removes and returns the last element
- 查找元素索引
index_of_air = elements.index('Air')
- 列表切片 (Slicing)
sub_elements = elements[1:4] # Get elements from index 1 to 3
- 列表推导式 (List Comprehension)
lengths = [len(element) for element in elements] # Create a new list with lengths of each element
- 排序列表
elements.sort()
- 反转列表
elements.reverse()
字典操作 (Dictionary Operations)
字典是 Python 中用于存储键值对的强大数据结构。
- 创建字典
elements = {'Hydrogen': 'H', 'Helium': 'He', 'Lithium': 'Li'}
- 添加或更新条目
elements['Carbon'] = 'C' # Adds 'Carbon' or updates its value to 'C'
- 移除条目
del elements['Lithium'] # Removes the key 'Lithium' and its value
- 检查键是否存在
if 'Helium' in elements: print('Helium is present')
- 迭代键 (Keys)
for element in elements: print(element) # Prints each key
- 迭代值 (Values)
for symbol in elements.values(): print(symbol) # Prints each value
- 迭代项目 (Items)
for element, symbol in elements.items(): print(f'{element}: {symbol}')
- 字典推导式 (Dictionary Comprehension)
squares = {x: x**2 for x in range(5)} # Squares of numbers from 0 to 4
- 合并字典
alchemists = {'Paracelsus': 'Mercury'} philosophers = {'Plato': 'Aether'} merged = {**alchemists, **philosophers} # Python 3.5+
- 带默认值获取值
element = elements.get('Neon', 'Unknown') # Returns 'Unknown' if 'Neon' is not found
操作系统操作 (Operating System Operations)
os
和 shutil
模块提供了与操作系统交互的强大功能。
- 文件路径导航
import os # Craft a path compatible with the underlying OS path = os.path.join('mystic', 'forest', 'artifact.txt') # Retrieve the tome's directory directory = os.path.dirname(path) # Unveil the artifact's name artifact_name = os.path.basename(path)
- 列出目录内容
import os contents = os.listdir('enchanted_grove') print(contents)
- 创建目录
import os # create a single directory os.mkdir('alchemy_lab') # create a hierarchy of directories os.makedirs('alchemy_lab/potions/elixirs')
- 删除文件和目录
import os # remove a file os.remove('unnecessary_scroll.txt') # remove an empty directory os.rmdir('abandoned_hut') # remove a directory and its contents import shutil shutil.rmtree('cursed_cavern')
- 执行 Shell 命令
import subprocess # Invoke the 'echo' incantation result = subprocess.run(['echo', 'Revealing the arcane'], capture_output=True, text=True) print(result.stdout)
- 使用环境变量
import os # Read the 'PATH' variable path = os.environ.get('PATH') # Create a new environment variable os.environ['MAGIC'] = 'Arcane'
- 更改当前工作目录
import os # Traverse to the 'arcane_library' directory os.chdir('arcane_library')
- 路径存在性和类型
import os # Check if a path exists exists = os.path.exists('mysterious_ruins') # Ascertain if the path is a directory is_directory = os.path.isdir('mysterious_ruins') # Determine if the path is a file is_file = os.path.isfile('ancient_manuscript.txt')
- 使用临时文件
import tempfile # Create a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False) print(temp_file.name) # Erect a temporary directory temp_dir = tempfile.TemporaryDirectory() print(temp_dir.name)
- 获取系统信息
import os import platform # Discover the operating system os_name = os.name # 'posix', 'nt', 'java' # Unearth detailed system information system_info = platform.system() # 'Linux', 'Windows', 'Darwin'
命令行接口 (CLI) - STDIN, STDOUT, STDERR
Python 脚本经常与命令行交互,掌握标准输入输出至关重要。
- 读取用户输入
user_input = input("Impart your wisdom: ") print(f"You shared: {user_input}")
- 打印到 STDOUT (标准输出)
print("Behold, the message of the ancients!")
- 格式化打印
name = "Merlin" age = 300 print(f"{name}, of {age} years, speaks of forgotten lore.")
- 从 STDIN (标准输入) 读取行
import sys for line in sys.stdin: print(f"Echo from the void: {line.strip()}")
- 写入 STDERR (标准错误)
import sys sys.stderr.write("Beware! The path is fraught with peril.\n")
- 重定向 STDOUT
import sys original_stdout = sys.stdout # Preserve the original STDOUT with open('mystic_log.txt', 'w') as f: sys.stdout = f # Redirect STDOUT to a file print("This message is inscribed within the mystic_log.txt.") sys.stdout = original_stdout # Restore STDOUT to its original glory
- 重定向 STDERR
import sys with open('warnings.txt', 'w') as f: sys.stderr = f # Redirect STDERR print("This warning is sealed within warnings.txt.", file=sys.stderr)
- 提示密码
import getpass secret_spell = getpass.getpass("Whisper the secret spell: ")
- 命令行参数
import sys # The script's name is the first argument, followed by those passed by the invoker # This example expects exactly two additional arguments # To run: python your_script.py arg1 arg2 # script, first_arg, second_arg = sys.argv # Uncomment and adjust if needed # print(f"Invoked with the sacred tokens: {first_arg} and {second_arg}") if len(sys.argv) > 1: print(f"命令行参数: {sys.argv[1:]}")
- 使用 Argparse 处理复杂 CLI 交互
import argparse parser = argparse.ArgumentParser(description="Invoke the ancient scripts.") parser.add_argument('spell', help="The spell to cast") parser.add_argument('--power', type=int, help="The power level of the spell") args = parser.parse_args() print(f"Casting {args.spell} with power {args.power}")
数学运算与排列组合 (Math and Combinatorics)
Python 内置了丰富的数学功能,并可通过 math
和 itertools
模块扩展。
- 基本算术运算
sum = 7 + 3 # Addition difference = 7 - 3 # Subtraction product = 7 * 3 # Multiplication quotient = 7 / 3 # Division remainder = 7 % 3 # Modulus (Remainder) power = 7 ** 3 # Exponentiation
- 处理复数 (Complex Numbers)
z = complex(2, 3) # Create a complex number 2 + 3j real_part = z.real # Retrieve the real part imaginary_part = z.imag # Retrieve the imaginary part conjugate = z.conjugate() # Get the conjugate
- 数学函数 (
math
模块)import math root = math.sqrt(16) # Square root logarithm = math.log(100, 10) # Logarithm base 10 of 100 sine = math.sin(math.pi / 2) # Sine of 90 degrees (in radians)
- 生成排列 (Permutations)
from itertools import permutations paths = permutations([1, 2, 3]) # Generate all permutations of the list [1, 2, 3] for path in paths: print(path)
- 生成组合 (Combinations)
from itertools import combinations combos = combinations([1, 2, 3, 4], 2) # Generate all 2-element combinations for combo in combos: print(combo)
- 随机数生成 (
random
模块)import random num = random.randint(1, 100) # Generate a random integer between 1 and 100
- 处理分数 (Fractions)
from fractions import Fraction f = Fraction(3, 4) # Create a fraction 3/4 print(f + 1) # Add a fraction and an integer
- 统计函数 (
statistics
模块)import statistics data = [1, 2, 3, 4, 5] mean = statistics.mean(data) # Average median = statistics.median(data) # Median stdev = statistics.stdev(data) # Standard Deviation
- 三角函数 (
math
模块)import math angle_rad = math.radians(60) # Convert 60 degrees to radians cosine = math.cos(angle_rad) # Cosine of the angle
- 处理 Infinity 和 NaN
import math infinity = math.inf # Representing infinity not_a_number = math.nan # Representing a non-number (NaN)
数据库操作 (Database Operations)
本节以 psycopg2
库为例,展示 PostgreSQL 数据库的基本操作。其他数据库(如 MySQL、SQLite)的操作模式类似。
- 建立连接
import psycopg2 connection = psycopg2.connect( dbname='your_database', user='your_username', password='your_password', host='your_host' )
- 创建游标 (Cursor)
cursor = connection.cursor()
- 执行查询
cursor.execute("SELECT * FROM your_table")
- 获取查询结果
records = cursor.fetchall() for record in records: print(record)
- 插入记录
cursor.execute("INSERT INTO your_table (column1, column2) VALUES (%s, %s)", ('value1', 'value2')) connection.commit() # Seal the transaction
- 更新记录
cursor.execute("UPDATE your_table SET column1 = %s WHERE column2 = %s", ('new_value', 'condition_value')) connection.commit()
- 删除记录
cursor.execute("DELETE FROM your_table WHERE condition_column = %s", ('condition_value',)) connection.commit()
- 创建表
cursor.execute(""" CREATE TABLE your_new_table ( id SERIAL PRIMARY KEY, column1 VARCHAR(255), column2 INTEGER ) """) connection.commit()
- 删除表
cursor.execute("DROP TABLE if exists your_table") connection.commit()
- 使用事务 (Transactions)
try: cursor.execute("your first transactional query") cursor.execute("your second transactional query") connection.commit() # Commit if all is well except Exception as e: connection.rollback() # Rollback in case of any issue print(f"An error occurred: {e}") finally: # Don't forget to close cursor and connection in a real application if cursor: cursor.close() if connection: connection.close()
异步 IO (Asynchronous IO / Async Programming)
Python 的 asyncio
库使得编写并发代码变得高效,尤其适用于 I/O 密集型任务。
- 定义异步函数
import asyncio async def fetch_data(): print("Fetching data...") await asyncio.sleep(2) # Simulate an I/O operation print("Data retrieved.")
- 运行异步函数
async def main(): await fetch_data() asyncio.run(main())
- 等待多个协程 (Coroutines)
async def main(): task1 = fetch_data() task2 = fetch_data() await asyncio.gather(task1, task2) asyncio.run(main())
- 创建任务 (Tasks)
async def main(): task1 = asyncio.create_task(fetch_data()) task2 = asyncio.create_task(fetch_data()) await task1 await task2 asyncio.run(main())
- 异步迭代 (Async Iteration)
async def fetch_item(item): await asyncio.sleep(0.5) # Simulate an I/O operation print(f"Fetched {item}") async def main(): items = ['potion', 'scroll', 'wand'] for item in items: await fetch_item(item) # Note: This is sequential, not concurrent for loop asyncio.run(main())
- 使用异步上下文管理器 (Async Context Managers)
# 异步上下文管理器需要实现 __aenter__ 和 __aexit__ 方法 class AsyncResource: async def __aenter__(self): print("Entering context") await asyncio.sleep(0.1) return self async def __aexit__(self, exc_type, exc_val, exc_tb): print("Exiting context") await asyncio.sleep(0.1) async def main(): async with AsyncResource() as ar: print("Within context") asyncio.run(main())
- 异步代码中的异常处理
async def risky_spell(): await asyncio.sleep(1) raise ValueError("The spell backfired!") async def main(): try: await risky_spell() except ValueError as e: print(f"Caught an error: {e}") asyncio.run(main())
- 异步生成器 (Async Generators)
async def fetch_items(): items = ['crystal', 'amulet', 'dagger'] for item in items: await asyncio.sleep(0.5) yield item async def main(): async for item in fetch_items(): print(f"Found {item}") asyncio.run(main())
- 使用信号量 (Semaphores)
async def guarded_spell(semaphore, item): async with semaphore: print(f"Processing {item}") await asyncio.sleep(1) async def main(): semaphore = asyncio.Semaphore(2) # Allow 2 concurrent tasks await asyncio.gather(*(guarded_spell(semaphore, i) for i in range(5))) asyncio.run(main())
- 事件循环 (Event Loop)
async def perform_spell(): print("Casting spell...") await asyncio.sleep(1) print("Spell cast.") loop = asyncio.get_event_loop() try: loop.run_until_complete(perform_spell()) finally: loop.close()
网络、套接字和网络接口 (Networking, Sockets, and Network Interfaces)
Python 的 socket
模块提供了低级的网络编程能力。
- 创建套接字 (Socket)
import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- 连接到远程服务器
# s.connect(('example.com', 80)) # Connect to example.com on port 80 # print("Connected to example.com:80") # For demonstration, avoid actual connection without proper handling
- 发送数据
# s.sendall(b'Hello, server')
- 接收数据
# data = s.recv(1024) # Receive up to 1024 bytes # print('Received', repr(data))
- 关闭套接字
# s.close()
- 创建监听套接字 (Listening Socket)
serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) serversocket.bind(('localhost', 8080)) # Bind to localhost on port 8080 serversocket.listen(1) # Listen for up to 1 incoming connection print("Server listening on port 8080...")
- 接受连接
# clientsocket, address = serversocket.accept() # print(f"Connection from {address} has been established.") # clientsocket.close() # serversocket.close()
- 非阻塞套接字操作
# s.setblocking(False)
- 使用 UDP 套接字
udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) udp_socket.bind(('localhost', 8081)) # Bind UDP socket to localhost on port 8081 print("UDP socket bound to port 8081")
- 枚举网络接口
import socket import netifaces # 注意:netifaces 库需要安装 (pip install netifaces) try: for interface in netifaces.interfaces(): addr_info = netifaces.ifaddresses(interface).get(netifaces.AF_INET) if addr_info: print(f"Interface: {interface}, Address: {addr_info[0]['addr']}") except Exception as e: print(f"Could not enumerate network interfaces (netifaces might not be installed or permissions missing): {e}")
Pandas 库 (DataFrames)
Pandas 是 Python 中进行数据分析和处理的明星库,DataFrame
是其核心数据结构。
- 创建 DataFrame
import pandas as pd data = { 'Element': ['Earth', 'Water', 'Fire', 'Air'], 'Symbol': ['🜃', '🜄', '🜂', '🜁'] } df = pd.DataFrame(data) print(df)
- 从 CSV 文件读取数据
# df = pd.read_csv('elements.csv') # Uncomment and provide a valid path
- 检查前几行
print(df.head(2))
- 选择列
symbols = df['Symbol'] print(symbols)
- 筛选行
fire_elements = df[df['Element'] == 'Fire'] print(fire_elements)
- 创建新列
df['Length'] = df['Element'].apply(len) print(df)
- 分组和聚合数据 (Groupby and Aggregate)
element_groups = df.groupby('Element').agg({'Length': 'mean'}) print(element_groups)
- 合并 DataFrames
df2 = pd.DataFrame({'Element': ['Earth', 'Fire'], 'Quality': ['Solid', 'Plasma']}) merged_df = pd.merge(df, df2, on='Element', how='left') print(merged_df)
- 处理缺失数据
# Example with NaN df_missing = pd.DataFrame({'A': [1, 2, None], 'B': [4, None, 6]}) print("Original:\n", df_missing) df_missing.fillna(value='Unknown', inplace=True) print("After fillna:\n", df_missing)
- 透视和重塑数据 (Pivot and Reshape)
# Creating some sample data for pivot data_pivot = {'City': ['A', 'A', 'B', 'B'], 'Year': [2020, 2021, 2020, 2021], 'Population': [100, 110, 200, 220]} df_pivot = pd.DataFrame(data_pivot) pivoted_df = df_pivot.pivot(index='City', columns='Year', values='Population') print(pivoted_df)
NumPy 库 (Arrays)
NumPy 是 Python 科学计算的核心库,提供了高性能的多维数组对象和工具。
- 创建 NumPy 数组
import numpy as np array = np.array([1, 2, 3, 4, 5]) print(array)
- 全零或全一数组
zeros = np.zeros((3, 3)) # A 3x3 array of zeros ones = np.ones((2, 4)) # A 2x4 array of ones print("Zeros:\n", zeros) print("Ones:\n", ones)
- 创建数字范围
range_array = np.arange(10, 50, 5) # From 10 to 50, step by 5 print(range_array)
- 创建线性间隔数组
linear_spaced = np.linspace(0, 1, 5) # 5 values from 0 to 1 print(linear_spaced)
- 重塑数组 (Reshape Array)
reshaped = np.arange(9).reshape(3, 3) # Reshape a 1D array into a 3x3 2D array print(reshaped)
- 基本数组操作 (Element-wise Operations)
a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) sum_arr = a + b # Element-wise addition difference = b - a # Element-wise subtraction product = a * b # Element-wise multiplication print(f"Sum: {sum_arr}, Difference: {difference}, Product: {product}")
- 矩阵乘法 (Matrix Multiplication)
# For 1D arrays, np.dot acts as dot product # For 2D arrays, it's matrix multiplication A_matrix = np.array([[1, 2], [3, 4]]) B_matrix = np.array([[5, 6], [7, 8]]) result_matrix = np.dot(A_matrix, B_matrix) # Equivalent to A_matrix @ B_matrix in Python 3.5+ print("Matrix product:\n", result_matrix)
- 访问数组元素 (Array Indexing)
element = a[2] # Retrieve the third element of array 'a' row = reshaped[1, :] # Retrieve the second row of 'reshaped' print(f"Element: {element}, Row: {row}")
- 布尔索引 (Boolean Indexing)
filtered = a[a > 2] # Elements of 'a' greater than 2 print(filtered)
- 聚合和统计 (Aggregation and Statistics)
mean_val = np.mean(a) maximum_val = np.max(a) sum_val = np.sum(a) print(f"Mean: {mean_val}, Max: {maximum_val}, Sum: {sum_val}")
Matplotlib 库 (数据可视化)
Matplotlib 是一个用于创建静态、交互式和动画可视化的 Python 库。
- 创建基本绘图 (Basic Plot)
import matplotlib.pyplot as plt x = [1, 2, 3, 4, 5] y = [1, 4, 9, 16, 25] plt.plot(x, y) plt.show()
- 添加标题和标签
plt.plot(x, y) plt.title('Growth Over Time') plt.xlabel('Time') plt.ylabel('Growth') plt.show()
- 创建散点图 (Scatter Plot)
plt.scatter(x, y) plt.show()
- 自定义线型和标记 (Line Styles and Markers)
plt.plot(x, y, linestyle='--', marker='o', color='b') plt.show()
- 在同一坐标轴上创建多个图
z = [2, 3, 4, 5, 6] plt.plot(x, y, label='y values') plt.plot(x, z, label='z values') plt.legend() plt.show()
- 创建子图 (Subplots)
fig, ax = plt.subplots(2, 1) # 2 rows, 1 column ax[0].plot(x, y) ax[0].set_title('Subplot 1') ax[1].plot(x, z) ax[1].set_title('Subplot 2') plt.tight_layout() # Adjust subplot params for a tight layout plt.show()
- 创建直方图 (Histogram)
data = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4] plt.hist(data, bins=4, edgecolor='black') plt.title('Histogram Example') plt.show()
- 添加图例 (Legend)
plt.plot(x, y, label='Growth') plt.plot(x, z, label='Decay') plt.legend() plt.show()
- 自定义刻度 (Ticks)
plt.plot(x, y) plt.xticks([1, 2, 3, 4, 5], ['One', 'Two', 'Three', 'Four', 'Five']) plt.yticks([0, 5, 10, 15, 20, 25], ['0', '5', '10', '15', '20', '25+']) plt.show()
- 保存图形 (Save Figure)
plt.plot(x, y) plt.savefig('growth_over_time.png') plt.close() # Close the plot to free memory
Scikit-Learn 库 (机器学习 - Machine Learning)
Scikit-learn 是一个广泛使用的 Python 机器学习库,提供了各种监督和无监督学习算法。
- 加载数据集 (Load Dataset)
from sklearn import datasets iris = datasets.load_iris() X, y = iris.data, iris.target print(f"Dataset shape: X={X.shape}, y={y.shape}")
- 将数据拆分为训练集和测试集 (Train/Test Split)
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) print(f"Train shapes: X_train={X_train.shape}, y_train={y_train.shape}") print(f"Test shapes: X_test={X_test.shape}, y_test={y_test.shape}")
- 训练模型 (Train Model)
from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(random_state=42) model.fit(X_train, y_train) print("Model trained.")
- 进行预测 (Make Predictions)
predictions = model.predict(X_test) print("Predictions made.")
- 评估模型性能 (Evaluate Model Performance)
from sklearn.metrics import accuracy_score accuracy = accuracy_score(y_test, predictions) print(f"Model accuracy: {accuracy:.2f}")
- 使用交叉验证 (Cross-Validation)
from sklearn.model_selection import cross_val_score scores = cross_val_score(model, X, y, cv=5) print(f"Cross-validation scores: {scores}") print(f"Mean CV accuracy: {scores.mean():.2f}")
- 特征缩放 (Feature Scaling)
from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) print("Features scaled.")
- 使用网格搜索进行参数调优 (Grid Search for Hyperparameter Tuning)
from sklearn.model_selection import GridSearchCV param_grid = {'n_estimators': [10, 50, 100], 'max_depth': [None, 5, 10]} grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3) grid_search.fit(X_train, y_train) print(f"Best parameters: {grid_search.best_params_}") print(f"Best score: {grid_search.best_score_:.2f}")
- 创建管道 (Pipeline)
from sklearn.pipeline import Pipeline pipeline = Pipeline([ ('scaler', StandardScaler()), ('classifier', RandomForestClassifier(random_state=42)) ]) pipeline.fit(X_train, y_train) pipe_predictions = pipeline.predict(X_test) pipe_accuracy = accuracy_score(y_test, pipe_predictions) print(f"Pipeline accuracy: {pipe_accuracy:.2f}")
- 保存和加载模型 (Save and Load Model)
import joblib # Saving the model joblib.dump(model, 'random_forest_model.joblib') print("Model saved.") # Loading the model loaded_model = joblib.load('random_forest_model.joblib') print("Model loaded.")
Plotly 库 (交互式数据可视化 - Interactive Data Visualization)
Plotly 是一个用于创建交互式、发布质量图表的 Python 库。
- 创建基本折线图 (Basic Line Plot)
import plotly.graph_objs as go import plotly.io as pio x = [1, 2, 3, 4, 5] y = [1, 4, 9, 16, 25] fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines')) pio.show(fig)
- 创建散点图 (Scatter Plot)
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='markers')) pio.show(fig)
- 创建条形图 (Bar Chart)
categories = ['A', 'B', 'C', 'D', 'E'] values = [10, 20, 15, 30, 25] fig = go.Figure(data=go.Bar(x=categories, y=values)) pio.show(fig)
- 创建饼图 (Pie Chart)
labels = ['Earth', 'Water', 'Fire', 'Air'] sizes = [25, 35, 20, 20] fig = go.Figure(data=go.Pie(labels=labels, values=sizes)) pio.show(fig)
- 创建直方图 (Histogram)
data = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4] fig = go.Figure(data=go.Histogram(x=data)) pio.show(fig)
- 创建箱线图 (Box Plot)
data = [1, 2, 2, 3, 4, 4, 4, 5, 5, 6] fig = go.Figure(data=go.Box(y=data)) pio.show(fig)
- 创建热力图 (Heatmap)
import numpy as np z = np.random.rand(10, 10) # Generate random data fig = go.Figure(data=go.Heatmap(z=z)) pio.show(fig)
- 创建 3D 曲面图 (3D Surface Plot)
z = np.random.rand(20, 20) # Generate random data fig = go.Figure(data=go.Surface(z=z)) pio.show(fig)
- 创建子图 (Subplots)
from plotly.subplots import make_subplots fig = make_subplots(rows=1, cols=2, subplot_titles=('Line Plot', 'Bar Chart')) fig.add_trace(go.Scatter(x=x, y=y, mode='lines'), row=1, col=1) fig.add_trace(go.Bar(x=categories, y=values), row=1, col=2) pio.show(fig)
- 创建交互式时间序列 (Interactive Time Series)
import pandas as pd dates = pd.date_range('20230101', periods=5) values = [10, 11, 12, 13, 14] fig = go.Figure(data=go.Scatter(x=dates, y=values, mode='lines+markers')) pio.show(fig)
日期和时间操作 (Date and Time Operations)
datetime
模块提供了处理日期和时间的基本类。
- 获取当前日期和时间
from datetime import datetime now = datetime.now() print(f"Current date and time: {now}")
- 创建特定日期和时间
specific_time = datetime(2023, 1, 1, 12, 30, 0) print(f"Specific date and time: {specific_time}")
- 格式化日期和时间
formatted = now.strftime("%Y-%m-%d %H:%M:%S") print(f"Formatted date and time: {formatted}")
- 从字符串解析日期和时间
date_string = "2023-01-01 15:00:00" parsed_date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S") print(f"Parsed date and time: {parsed_date}")
- 处理时间差 (Timedelta)
from datetime import timedelta delta = timedelta(days=7, hours=3) future_date = now + delta print(f"Date after 7 days and 3 hours: {future_date}")
- 比较日期和时间
if specific_time > now: print("Specific time is in the future.") else: print("Specific time has passed.")
- 从日期/时间中提取组件
year = now.year month = now.month day = now.day hour = now.hour minute = now.minute second = now.second print(f"Year: {year}, Month: {month}, Day: {day}, Hour: {hour}, Minute: {minute}, Second: {second}")
- 处理时区 (Timezones)
from datetime import timezone, timedelta utc_time = datetime.now(timezone.utc) print(f"Current UTC time: {utc_time}") # Adjusting to a specific timezone (e.g., EST, which is UTC-5) est_offset = timedelta(hours=-5) est_timezone = timezone(est_offset) est_time = utc_time.astimezone(est_timezone) print(f"Current EST time: {est_time}")
- 获取星期几
weekday = now.strftime("%A") # Full weekday name print(f"Today is: {weekday}")
- 处理 Unix 时间戳 (Unix Timestamp)
timestamp = datetime.timestamp(now) print(f"Current timestamp: {timestamp}") # Converting a timestamp back to a datetime date_from_timestamp = datetime.fromtimestamp(timestamp) print(f"Date from timestamp: {date_from_timestamp}")
高级列表推导和 Lambda 函数 (Advanced List Comprehensions and Lambda Functions)
列表推导式和 Lambda 函数是 Python 中简洁且强大的功能,常用于数据转换。
- 嵌套列表推导式
matrix = [[j for j in range(5)] for i in range(3)] print(matrix) # Creates a 3x5 matrix
- 条件列表推导式
filtered = [x for x in range(10) if x % 2 == 0] print(filtered) # Even numbers from 0 to 9
- 多可迭代对象的列表推导式
pairs = [(x, y) for x in [1, 2, 3] for y in [3, 1, 4] if x != y] print(pairs) # Pairs of non-equal elements
- 使用 Lambda 函数
square = lambda x: x**2 print(square(5)) # Returns 25
- 列表推导式中的 Lambda 函数
# 尽管直接使用函数更常见,但Lambda也可以这样用 squared = [x**2 for x in range(5)] # 更 Pythonic 的方式 print(squared) # Squares of numbers from 0 to 4
- 用于展平列表的列表推导式 (Flattening Lists)
nested = [[1, 2, 3], [4, 5], [6, 7]] flattened = [x for sublist in nested for x in sublist] print(flattened)
- 将函数应用于元素
import math transformed = [math.sqrt(x) for x in range(1, 6)] print(transformed) # Square roots of numbers from 1 to 5
- 将 Lambda 与 Map 和 Filter 结合使用
mapped = list(map(lambda x: x**2, range(5))) filtered = list(filter(lambda x: x > 5, mapped)) print(f"Mapped: {mapped}") # Squares of numbers from 0 to 4 print(f"Filtered: {filtered}") # Elements greater than 5
- 带条件表达式的列表推导式
conditional = [x if x > 2 else x**2 for x in range(5)] print(conditional) # Squares numbers less than or equal to 2, passes others unchanged
- 使用 Lambda 进行复杂转换
complex_transformation = list(map(lambda x: x**2 if x % 2 == 0 else x + 5, range(5))) print(complex_transformation) # Applies different transformations based on even-odd condition
面向对象编程 (OOP)
Python 是一种多范式语言,OOP 是其重要组成部分。
- 定义类 (Class)
class Wizard: def __init__(self, name, power): self.name = name self.power = power def cast_spell(self): print(f"{self.name} casts a spell with power {self.power}!")
- 创建实例 (Instance)
merlin = Wizard("Merlin", 100)
- 调用方法 (Method)
merlin.cast_spell()
- 继承 (Inheritance)
class ArchWizard(Wizard): def __init__(self, name, power, realm): super().__init__(name, power) # Call parent constructor self.realm = realm def summon_familiar(self): print(f"{self.name} summons a familiar from the {self.realm} realm.") gandalf = ArchWizard("Gandalf", 120, "Middle-earth") gandalf.cast_spell() gandalf.summon_familiar()
- 重写方法 (Method Overriding)
class Sorcerer(Wizard): def cast_spell(self): print(f"{self.name} casts a powerful dark spell!") voldemort = Sorcerer("Voldemort", 90) voldemort.cast_spell()
- 多态 (Polymorphism)
def unleash_magic(wizard_obj): wizard_obj.cast_spell() # Calls the appropriate cast_spell method unleash_magic(merlin) unleash_magic(voldemort)
- 封装 (Encapsulation)
class Alchemist: def __init__(self, secret_ingredient): # 使用双下划线使属性成为私有 (名称混淆) self.__secret = secret_ingredient def reveal_secret(self): print(f"The secret ingredient is {self.__secret}") alchemist = Alchemist("Philosopher's Stone") alchemist.reveal_secret() # print(alchemist.__secret) # This would raise an AttributeError print(alchemist._Alchemist__secret) # Access via name mangling (not recommended)
- 组合 (Composition)
class Spellbook: def __init__(self, spells): self.spells = spells def list_spells(self): print(f"Spells in book: {', '.join(self.spells)}") class Mage: def __init__(self, name, spellbook): self.name = name self.spellbook = spellbook # Composition: Mage has a Spellbook my_spellbook = Spellbook(["Fireball", "Teleportation"]) my_mage = Mage("Elara", my_spellbook) my_mage.spellbook.list_spells()
- 类方法和静态方法 (Class Methods and Static Methods)
class Enchanter: # 静态方法不访问实例或类状态 @staticmethod def enchant(item): print(f"{item} is enchanted!") # 类方法接收类作为第一个参数 (cls) @classmethod def summon(cls): print(f"A new {cls.__name__} is summoned.") Enchanter.enchant("Sword") Enchanter.summon()
- 属性和设置器 (Properties and Setters)
class Elementalist: def __init__(self, element): self._element = element # Protected member convention @property # Getter method def element(self): return self._element @element.setter # Setter method def element(self, value): valid_elements = ["Fire", "Water", "Earth", "Air"] if value in valid_elements: self._element = value else: print(f"Invalid element '{value}'! Must be one of {valid_elements}") avatar = Elementalist("Water") print(f"Current element: {avatar.element}") avatar.element = "Fire" print(f"New element: {avatar.element}") avatar.element = "Spirit" # Invalid
装饰器 (Decorators)
装饰器是一种在不修改函数或类定义的情况下,动态地添加或修改其行为的强大方式。
- 基本装饰器
def my_decorator(func): def wrapper(): print("Something is happening before the function is called.") func() print("Something is happening after the function is called.") return wrapper @my_decorator def say_hello(): print("Hello!") say_hello()
- 带参数的装饰器
def my_decorator(func): def wrapper(*args, **kwargs): print("Before call") result = func(*args, **kwargs) print("After call") return result return wrapper @my_decorator def greet(name): print(f"Hello {name}") greet("Alice")
- 使用
functools.wraps
from functools import wraps def my_decorator(func): @wraps(func) # Preserves original function's metadata def wrapper(*args, **kwargs): """Wrapper function""" return func(*args, **kwargs) return wrapper @my_decorator def greet_wrapped(name): """Greet someone""" print(f"Hello {name}") print(f"Function name: {greet_wrapped.__name__}") # Outputs: 'greet_wrapped' print(f"Function doc: {greet_wrapped.__doc__}") # Outputs: 'Greet someone'
- 类装饰器 (Class Decorators)
class MyDecorator: def __init__(self, func): self.func = func def __call__(self, *args, **kwargs): print("Before call from class decorator") self.func(*args, **kwargs) print("After call from class decorator") @MyDecorator def greet_class_decorated(name): print(f"Hello {name}") greet_class_decorated("Bob")
- 带参数的装饰器 (工厂函数 - Decorators with Arguments)
def repeat(times): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): for _ in range(times): func(*args, **kwargs) return wrapper return decorator @repeat(3) def say_hello_repeatedly(): print("Hello") say_hello_repeatedly()
- 方法装饰器 (Method Decorators)
def method_decorator(func): @wraps(func) def wrapper(self, *args, **kwargs): print("Method Decorator applied to instance method") return func(self, *args, **kwargs) return wrapper class MyClass: @method_decorator def greet_instance(self, name): print(f"Hello {name}") obj = MyClass() obj.greet_instance("Alice")
- 堆叠装饰器 (Stacking Decorators)
# 装饰器从下往上执行 @my_decorator @repeat(2) def greet_stacked(name): print(f"Hello {name}") greet_stacked("Charlie")
- 带可选参数的装饰器
def smart_decorator(arg=None): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): if arg: print(f"Argument from smart decorator: {arg}") return func(*args, **kwargs) return wrapper if callable(arg): # If decorator is used without parentheses @smart_decorator return decorator(arg) return decorator @smart_decorator def no_args_decorated(): print("Function decorated without explicit args.") @smart_decorator("With explicit args") def with_args_decorated(): print("Function decorated with explicit args.") no_args_decorated() with_args_decorated()
- 类方法装饰器 (Class Method Decorators)
class MyClass: @classmethod @my_decorator # Can decorate class methods too def class_method_decorated(cls): print(f"Class method called from {cls.__name__}") MyClass.class_method_decorated()
- 静态方法装饰器 (Static Method Decorators)
class MyClass: @staticmethod @my_decorator # Can decorate static methods def static_method_decorated(): print("Static method called") MyClass.static_method_decorated()
GraphQL
GraphQL 是一种用于 API 的查询语言,本节使用 gql
库进行操作。
- 设置 GraphQL 客户端
from gql import gql, Client from gql.transport.requests import RequestsHTTPTransport # Replace with your actual GraphQL endpoint # transport = RequestsHTTPTransport(url='https://your-graphql-endpoint.com/graphql') # client = Client(transport=transport, fetch_schema_from_transport=True) # print("GraphQL client setup (placeholder).")
- 执行简单查询 (Simple Query)
# query = gql(''' # { # allWizards { # id # name # power # } # } # ''') # # result = client.execute(query) # print(result) # print("Simple query example (placeholder).")
- 执行带变量的查询 (Query with Variables)
# query = gql(''' # query GetWizards($element: String!) { # wizards(element: $element) { # id # name # } # } # ''') # params = {"element": "Fire"} # result = client.execute(query, variable_values=params) # print(result) # print("Query with variables example (placeholder).")
- 执行 Mutation
# mutation = gql(''' # mutation CreateWizard($name: String!, $element: String!) { # createWizard(name: $name, element: $element) { # wizard { # id # name # } # } # } # ''') # params = {"name": "Gandalf", "element": "Light"} # result = client.execute(mutation, variable_values=params) # print(result) # print("Mutation example (placeholder).")
- 处理错误
# from gql import gql, Client # from gql.transport.exceptions import TransportQueryError # try: # result = client.execute(query) # except TransportQueryError as e: # print(f"GraphQL Query Error: {e}") # print("Error handling example (placeholder).")
- 订阅 (Subscriptions)
# subscription = gql(''' # subscription { # wizardUpdated { # id # name # power # } # } # ''') # # Subscriptions typically run in an async context or separate thread # # for result in client.subscribe(subscription): # # print(result) # print("Subscription example (placeholder).")
- 片段 (Fragments)
# query = gql(''' # fragment WizardDetails on Wizard { # name # power # } # query { # allWizards { # ...WizardDetails # } # } # ''') # result = client.execute(query) # print(result) # print("Fragments example (placeholder).")
- 内联片段 (Inline Fragments)
# query = gql(''' # { # search(text: "magic") { # __typename # ... on Wizard { # name # power # } # ... on Spell { # name # effect # } # } # } # ''') # result = client.execute(query) # print(result) # print("Inline fragments example (placeholder).")
- 使用指令 (Directives)
# query = gql(''' # query GetWizards($withPower: Boolean!) { # allWizards { # name # power @include(if: $withPower) # } # } # ''') # params = {"withPower": True} # result = client.execute(query, variable_values=params) # print(result) # print("Directives example (placeholder).")
- 批量请求 (Batching Requests)
# from gql import gql, Client # from gql.transport.requests import RequestsHTTPTransport # # transport = RequestsHTTPTransport(url='https://your-graphql-endpoint.com/graphql', use_json=True) # client = Client(transport=transport, fetch_schema_from_transport=True) # # query1 = gql('query { wizard(id: "1") { name } }') # query2 = gql('query { allSpells { name } }') # # # results = client.execute([query1, query2]) # This would send them as a batch # # print(results) # print("Batching requests example (placeholder).")
(请注意:上述 GraphQL 代码示例仅为结构展示,需替换为实际可用的 GraphQL 客户端和 endpoint 才能运行。)
正则表达式 (Regular Expressions)
Python 的 re
模块提供了强大的正则表达式操作功能,用于模式匹配和字符串处理。
- 基本模式匹配
import re text = "Search this string for patterns." match = re.search(r"patterns", text) if match: print("Pattern found!")
- 编译正则表达式 (Compile Regex)
pattern = re.compile(r"patterns") match = pattern.search(text) if match: print("Pattern found using compiled regex!")
- 匹配开头或结尾
if re.match(r"^Search", text): print("Starts with 'Search'") if re.search(r"patterns\.$", text): # Escape the dot as it's a special character print("Ends with 'patterns.'")
- 查找所有匹配项 (
findall
)all_matches = re.findall(r"t\w+", text) # Finds words starting with 't' print(all_matches)
- 查找和替换 (
sub
)replaced_text = re.sub(r"string", "sentence", text) print(replaced_text)
- 拆分字符串 (
split
)words = re.split(r"\s+", text) # Split on one or more spaces print(words)
- 转义特殊字符 (Escaping Special Characters)
# \b 是一个词边界 (word boundary) escaped_match = re.search(r"\bfor\b", text) if escaped_match: print(f"Found word 'for': {escaped_match.group()}")
- 分组和捕获 (Grouping and Capturing)
match = re.search(r"(\w+) (\w+)", "Hello World") if match: print(f"Full match: {match.group(0)}") # The whole match print(f"First group: {match.group(1)}") # The first group print(f"Second group: {match.group(2)}") # The second group
- 非捕获组 (Non-Capturing Groups)
match = re.search(r"(?:\w+) (\w+)", "Hello World") if match: print(f"Only captured: {match.group(1)}") # The first (and only) group
- 先行断言和后行断言 (Lookahead and Lookbehind)
text_extended = "pre string suffix" lookahead = re.search(r"\w+(?= string)", text_extended) # Word before ' string' lookbehind = re.search(r"(?<=pre )\w+", text_extended) # Word after 'pre ' if lookahead: print(f"Lookahead match: {lookahead.group()}") if lookbehind: print(f"Lookbehind match: {lookbehind.group()}")
- 修改模式匹配行为的标志 (Flags)
case_insensitive = re.findall(r"search", text, re.IGNORECASE) print(f"Case-insensitive match: {case_insensitive}")
- 使用命名组 (Named Groups)
match = re.search(r"(?P<first>\w+) (?P<second>\w+)", "Alpha Beta") if match: print(f"Named group 'first': {match.group('first')}") print(f"Named group 'second': {match.group('second')}")
- 多行匹配 (Multiline Matching)
multi_line_text = "Start\nmiddle end" # ^ 匹配行的开始,当设置 re.MULTILINE 标志时 matches = re.findall(r"^m\w+", multi_line_text, re.MULTILINE) print(f"Multiline matches: {matches}")
- 惰性量词 (Lazy Quantifiers)
html = "<body><h1>Title</h1></body>" # .* 默认是贪婪的,会匹配到最后一个 > # .*? 是惰性的,只匹配到第一个 > match = re.search(r"<.*?>", html) if match: print(f"Lazy quantifier match: {match.group()}") # Matches '<body>'
- 详细正则表达式 (Verbose Regex)
pattern = re.compile(r""" \b # 词边界 \w+ # 一个或多个单词字符 \s # 空格 (?:string)? # 可选的非捕获组 'string' \.? # 可选的点 """, re.VERBOSE) # re.VERBOSE 允许在模式中添加空白和注释 match = pattern.search("Search this string for patterns.") if match: print(f"Verbose regex match: {match.group()}")
字符串操作 (String Operations)
Python 字符串是不可变的序列,提供了丰富的内置方法。
- 连接字符串
greeting = "Hello" name = "Alice" message = greeting + ", " + name + "!" print(message)
- 使用
str.format
格式化字符串message = "{}, {}. Welcome!".format(greeting, name) print(message)
- 格式化字符串字面量 (f-strings)
message = f"{greeting}, {name}. Welcome!" print(message)
- 字符串方法 - 大小写转换
s = "Python" print(f"Uppercase: {s.upper()}") # Uppercase print(f"Lowercase: {s.lower()}") # Lowercase print(f"Title Case: {s.title()}") # Title Case
- 字符串方法 -
strip
,rstrip
,lstrip
s = " trim me " print(f"Stripped: '{s.strip()}'") # Both ends print(f"Rstripped: '{s.rstrip()}'") # Right end print(f"Lstripped: '{s.lstrip()}'") # Left end
- 字符串方法 -
startswith
,endswith
s = "filename.txt" print(f"Starts with 'file': {s.startswith('file')}") # True print(f"Ends with '.txt': {s.endswith('.txt')}") # True
- 字符串方法 -
split
,join
s = "split,this,string" words = s.split(",") # Split string into list joined = " ".join(words) # Join list into string print(f"Split: {words}") print(f"Joined: '{joined}'")
- 字符串方法 -
replace
s = "Hello world" new_s = s.replace("world", "Python") print(f"Replaced: {new_s}")
- 字符串方法 -
find
,index
s = "look for a substring" position = s.find("substring") # Returns -1 if not found print(f"Find 'substring': {position}") try: index = s.index("substring") # Raises ValueError if not found print(f"Index of 'substring': {index}") except ValueError as e: print(f"Index error: {e}")
- 字符串方法 - 字符操作 (Iteration)
s = "characters" print("Characters:") for char in s: print(char, end=' ') print()
- 字符串方法 -
isdigit
,isalpha
,isalnum
print(f"'123' is digit: {'123'.isdigit()}") # True print(f"'abc' is alpha: {'abc'.isalpha()}") # True print(f"'abc123' is alphanumeric: {'abc123'.isalnum()}")# True
- 字符串切片 (String Slicing)
s = "slice me" sub = s[2:7] # From 3rd to 7th character (index 2 to 6) print(f"Sliced: '{sub}'")
- 使用
len
获取字符串长度s = "length" print(f"Length of '{s}': {len(s)}") # 6
- 多行字符串 (Multiline Strings)
multi = """Line one Line two Line three""" print("Multiline string:") print(multi)
- 原始字符串 (Raw Strings)
# 原始字符串忽略反斜杠的转义功能 path = r"C:\User\name\folder\new" print(f"Raw string path: {path}")
Web 抓取 (Web Scraping)
利用 requests
和 BeautifulSoup
进行 Web 抓取是 Python 的常见应用。
- 使用
requests
获取网页import requests # url = 'https://example.com' # Use a real URL for actual scraping # try: # response = requests.get(url, timeout=5) # response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) # html = response.text # print(f"Fetched content from {url[:30]}...") # except requests.exceptions.RequestException as e: # print(f"Error fetching URL: {e}") # html = "<html><body><h1>Example Domain</h1><p>This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.</p><a href='/'>More information...</a></body></html>" # Fallback for example html = "<html><head><title>Example Page</title></head><body><h1 class='main-heading'>Welcome</h1><div class='article'><h2 class='article-title'>Article One</h2><p>Content of article one.</p><a href='/article1'>Read More</a></div><div class='article'><h2 class='article-title'>Article Two</h2><p>Content of article two.</p><a href='/article2'>Read More</a></div></body></html>"
- 使用
BeautifulSoup
解析 HTMLfrom bs4 import BeautifulSoup soup = BeautifulSoup(html, 'html.parser') # print(soup.prettify()) # Pretty-print the HTML print("HTML parsed with BeautifulSoup.")
- 导航 HTML 树 (HTML Tree Navigation)
title = soup.title.text # Get the page title print(f"Page Title: {title}") headings = soup.find_all('h1') # List of all <h1> tags for h in headings: print(f"Heading 1: {h.text}")
- 使用 CSS 选择器
articles = soup.select('div.article') # All elements with class 'article' inside a <div> print(f"Found {len(articles)} articles using CSS selector.")
- 从标签中提取数据
for i, article in enumerate(articles): # 使用 select_one 或 find 可以更健壮地查找子元素 title_tag = article.select_one('h2.article-title') link_tag = article.find('a') title = title_tag.text if title_tag else "No Title" link = link_tag['href'] if link_tag and 'href' in link_tag.attrs else "No Link" print(f"Article {i+1}: Title='{title}', Link='{link}'")
- 处理相对 URL (Relative URLs)
from urllib.parse import urljoin base_url = 'https://example.com' relative_links = [a['href'] for article in articles for a in article.find_all('a') if 'href' in a.attrs] absolute_urls = [urljoin(base_url, link) for link in relative_links] print(f"Absolute URLs: {absolute_urls}")
- 处理分页 (Pagination)
# base_url_pagination = "https://example.com/page/" # for page in range(1, 3): # For example, 2 pages # page_url = base_url_pagination + str(page) # # response = requests.get(page_url) # # Process each page's content # print(f"Processing page: {page_url}") print("Pagination example (conceptual).")
- 处理 AJAX 请求
# Find the URL of the AJAX request (using browser's developer tools) and fetch it # ajax_url = 'https://api.example.com/ajax_data' # data = requests.get(ajax_url).json() # Assuming the response is JSON # print(f"Fetched AJAX data: {data}") print("AJAX request example (conceptual).")
- Web 抓取中使用正则表达式
import re # Simulating a page with emails for example email_html = "Contact us at [email protected] or [email protected]. My email is [email protected]." emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', email_html) print(f"Emails found: {emails}")
- 尊重
robots.txt
from urllib.robotparser import RobotFileParser # rp = RobotFileParser() # rp.set_url('https://example.com/robots.txt') # rp.read() # url_to_check = 'https://example.com/some_page' # can_scrape = rp.can_fetch('*', url_to_check) # print(f"Can scrape {url_to_check}: {can_scrape}") print("Robots.txt example (conceptual, requires network).")
- 使用会话和 Cookie (Sessions and Cookies)
# session = requests.Session() # session.get('https://example.com/login') # Perform login to get cookies # session.cookies.set('key', 'value') # Manually set cookies, if needed # response = session.get('https://example.com/protected_page') # print(f"Response from protected page: {response.status_code}") print("Sessions and Cookies example (conceptual).")
- 使用浏览器自动化 (
selenium
库) 进行抓取# from selenium import webdriver # from selenium.webdriver.chrome.service import Service as ChromeService # from webdriver_manager.chrome import ChromeDriverManager # # # Ensure ChromeDriver is installed and in PATH, or specify path # try: # browser = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install())) # browser.get('https://example.com') # content = browser.page_source # print(f"Selenium fetched page content length: {len(content)}") # # Parse and extract data using BeautifulSoup, etc. # browser.quit() # except Exception as e: # print(f"Selenium example failed (ensure WebDriver is set up): {e}") print("Selenium example (conceptual, requires setup).")
- Web 抓取中的错误处理
# url_bad = 'https://httpbin.org/status/404' # try: # response = requests.get(url_bad, timeout=5) # response.raise_for_status() # Raises an error for bad status codes # except requests.exceptions.RequestException as e: # print(f"Error during web scraping: {e}") print("Error handling in web scraping (conceptual).")
- 异步 Web 抓取
# import aiohttp # import asyncio # # async def fetch_async(url): # async with aiohttp.ClientSession() as session: # async with session.get(url) as response: # return await response.text() # # urls_async = ['https://example.com/page1', 'https://example.com/page2'] # Use real URLs # # loop = asyncio.get_event_loop() # Deprecated in Python 3.10+ # # pages = loop.run_until_complete(asyncio.gather(*(fetch_async(url) for url in urls_async))) # async def main_async_scrape(): # pages = await asyncio.gather(*(fetch_async(url) for url in urls_async)) # print(f"Fetched {len(pages)} pages asynchronously.") # # # asyncio.run(main_async_scrape()) print("Asynchronous web scraping example (conceptual).")
- 数据存储 (CSV, 数据库)
import csv sample_articles_data = [{'title': 'Article One', 'url': '/article1'}, {'title': 'Article Two', 'url': '/article2'}] with open('output.csv', 'w', newline='', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow(['Title', 'URL']) # Header row for article in sample_articles_data: writer.writerow([article['title'], article['url']]) print("Data stored to output.csv.")
使用 pip
(包管理 - Package Management)
pip
是 Python 的包安装程序,用于安装和管理 Python 软件包。
- 安装包
pip install numpy
- 列出已安装的包
pip list
- 升级包
pip install --upgrade numpy
- 卸载包
pip uninstall numpy
- 搜索包
pip search "data visualization" # Note: pip search functionality is deprecated in modern pip versions. Use PyPI directly.
- 安装特定版本的包
pip install numpy==1.18.5
- 生成 requirements 文件
pip freeze > requirements.txt
- 从 requirements 文件安装包
pip install -r requirements.txt
- 使用虚拟环境 (Virtual Environments)
# Create a virtual environment named 'venv' python -m venv venv # Activate the virtual environment # On Windows # .\venv\Scripts\activate # On Unix or MacOS # source venv/bin/activate
- 检查包依赖
pip show numpy
常用内置函数和包 (Common Built-in Functions and Packages)
Python 提供了丰富的内置函数和标准库,涵盖了从操作系统交互到数据处理的各种需求。
os
- 操作系统接口import os current_directory = os.getcwd() # Get the current working directory print(f"Current working directory: {current_directory}")
sys
- 系统特定参数和函数import sys # sys.exit() # Exit the script (uncomment to test) print(f"Python version: {sys.version}")
datetime
- 基本日期和时间类型from datetime import datetime now_dt = datetime.now() # Current date and time print(f"Current datetime: {now_dt}")
math
- 数学函数import math result_math = math.sqrt(16) # Square root print(f"Square root of 16: {result_math}")
random
- 生成伪随机数import random number = random.randint(1, 10) # Random integer between 1 and 10 print(f"Random number (1-10): {number}")
json
- JSON 编码器和解码器import json data_dict = {'name': 'Alice', 'age': 30} json_string = json.dumps(data_dict, indent=2) # Dictionary to JSON string print(f"JSON string:\n{json_string}") loaded_dict = json.loads(json_string) # JSON string to dictionary print(f"Loaded dict: {loaded_dict}")
re
- 正则表达式import re match_re = re.search('Hello', 'Hello, world!') # Search for 'Hello' in the string print(f"Regex match found: {bool(match_re)}")
urllib
- URL 处理模块from urllib.request import urlopen try: # content_url = urlopen('http://example.com').read() # Fetch the content of a webpage # print(f"Content from example.com (first 100 chars):\n{content_url[:100]}") print("urllib.request example (conceptual).") except Exception as e: print(f"Could not open URL (network issue?): {e}")
http
- HTTP 模块# 示例服务器代码 (通常在单独的脚本中运行) # from http.server import HTTPServer, BaseHTTPRequestHandler # # class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): # def do_GET(self): # self.send_response(200) # self.send_header('Content-type', 'text/html') # self.end_headers() # self.wfile.write(b'<html><head><title>Python HTTP Server</title></head>') # self.wfile.write(b'<body><h1>Hello from a simple Python HTTP server!</h1></body></html>') # # def run_http_server(server_class=HTTPServer, handler_class=SimpleHTTPRequestHandler, port=8000): # server_address = ('', port) # httpd = server_class(server_address, handler_class) # print(f"Server starting on port {port}...") # httpd.serve_forever() # # if __name__ == '__main__': # # To run this, uncomment the lines above and run this specific file. # # It will block, so usually you run it in a dedicated script/process. # # run_http_server() print("HTTP server example (conceptual).")
subprocess
- 子进程管理import subprocess print("Running 'ls -l' (or 'dir' on Windows):") # For cross-platform compatibility, use shell=True might be simpler for simple commands, # but it's generally safer to pass commands as a list for subprocess.run. try: result_sub = subprocess.run(['ls', '-l'], capture_output=True, text=True, check=True) print(result_sub.stdout) except FileNotFoundError: try: # Try 'dir' for Windows result_sub = subprocess.run(['cmd', '/c', 'dir'], capture_output=True, text=True, check=True) print(result_sub.stdout) except Exception as e: print(f"Command execution failed: {e}") except subprocess.CalledProcessError as e: print(f"Command failed with error: {e.stderr}")
socket
- 低级网络接口import socket s_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create a TCP/IP socket print(f"Created socket: {s_sock.family}, {s_sock.type}") s_sock.close()
threading
- 基于线程的并行import threading def worker_thread(): print("Worker thread executing") thread = threading.Thread(target=worker_thread) thread.start() thread.join() # Wait for the thread to complete print("Main thread finished.")
multiprocessing
- 基于进程的并行from multiprocessing import Process def worker_process(): print("Worker process executing") p = Process(target=worker_process) p.start() p.join() # Wait for the process to complete print("Main process finished.")
argparse
- 命令行选项、参数和子命令的解析器import argparse # parser_arg = argparse.ArgumentParser(description="Process some integers.") # parser_arg.add_argument('integers', metavar='N', type=int, nargs='+', # help='an integer for the accumulator') # args_arg = parser_arg.parse_args([]) # Pass empty list to avoid sys.argv parsing in example # print(f"Argparse example (conceptual): {args_arg}") print("Argparse example (conceptual).")
logging
- 日志工具import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.warning('This is a warning message') logging.info('This is an info message')
unittest
- 单元测试框架import unittest class TestStringMethods(unittest.TestCase): def test_upper(self): self.assertEqual('foo'.upper(), 'FOO') def test_split(self): s = 'hello world' self.assertEqual(s.split(), ['hello', 'world']) with self.assertRaises(TypeError): s.split(2) # To run tests: unittest.main(argv=['first-arg-is-ignored'], exit=False) # Or typically: python -m unittest your_test_file.py print("Unittest example (conceptual).")
pathlib
- 面向对象的文件系统路径from pathlib import Path p_path = Path('.') # Current directory print(f"Current path (Path object): {p_path.resolve()}")
functools
- 高阶函数和可调用对象操作from functools import lru_cache @lru_cache(maxsize=None) # Cache results of fib(n) def fib(n): if n < 2: return n return fib(n-1) + fib(n-2) print(f"Fib(10) using lru_cache: {fib(10)}")
collections
- 容器数据类型python from collections import Counter, defaultdict c = Counter('hello world') print(f"Counter: {c}") dd = defaultdict(int) dd['a'] += 1 print(f"Defaultdict: {dd}")
itertools
- 用于高效循环的迭代器创建函数import itertools print("Combinations of 'ABCD', 2 elements:") for combination in itertools.combinations('ABCD', 2): print(combination, end=' ') print()
hashlib
- 安全哈希和消息摘要算法import hashlib hash_object = hashlib.sha256(b'Hello World') hex_dig = hash_object.hexdigest() print(f"SHA256 of 'Hello World': {hex_dig}")
csv
- CSV 文件读写import csv # Example writing to CSV data_to_write = [['Name', 'Age'], ['Alice', 30], ['Bob', 24]] with open('output_example.csv', 'w', newline='', encoding='utf-8') as outfile: writer = csv.writer(outfile) writer.writerows(data_to_write) print("Data written to output_example.csv.") # Example reading from CSV with open('output_example.csv', mode='r', encoding='utf-8') as infile: reader = csv.reader(infile) for row in reader: print(f"CSV row: {row}")
xml.etree.ElementTree
- ElementTree XML APIimport xml.etree.ElementTree as ET # Create a simple XML structure for demonstration root_xml = ET.Element("data") item_xml = ET.SubElement(root_xml, "item") item_xml.set("name", "apple") item_xml.text = "Red" tree = ET.ElementTree(root_xml) # tree.write("output_example.xml") # Uncomment to write to file print("XML ElementTree example (conceptual).")
sqlite3
- SQLite 数据库的 DB-API 2.0 接口import sqlite3 conn_sqlite = sqlite3.connect(':memory:') # In-memory database cursor_sqlite = conn_sqlite.cursor() cursor_sqlite.execute("CREATE TABLE users (id INTEGER, name TEXT)") cursor_sqlite.execute("INSERT INTO users VALUES (?, ?)", (1, 'Alice')) conn_sqlite.commit() cursor_sqlite.execute("SELECT * FROM users") print(f"SQLite query result: {cursor_sqlite.fetchall()}") conn_sqlite.close()
tkinter
- GUI 工具包import tkinter as tk # root_tk = tk.Tk() # root_tk.title("Tkinter Example") # label_tk = tk.Label(root_tk, text="Hello, Tkinter!") # label_tk.pack() # root_tk.mainloop() # This will open a GUI window print("Tkinter GUI example (conceptual, requires GUI environment).")
pickle
- Python 对象序列化import pickle obj_to_pickle = {'a': 1, 'b': [2, 3]} serialized_obj = pickle.dumps(obj_to_pickle) print(f"Pickled object (bytes): {serialized_obj}") deserialized_obj = pickle.loads(serialized_obj) print(f"Unpickled object: {deserialized_obj}")
io
- 处理流的核心工具from io import StringIO f_io = StringIO("some initial text data") content_io = f_io.read() print(f"StringIO content: '{content_io}'") f_io.close()
time
- 时间访问和转换import time start_time = time.time() time.sleep(0.1) # Sleep for 0.1 second end_time = time.time() print(f"Time slept: {end_time - start_time:.4f} seconds")
calendar
- 通用日历相关函数import calendar print("\nCalendar for January 2023:") print(calendar.month(2023, 1)) # Print the calendar for January 2023
queue
- 同步队列类from queue import Queue q = Queue() q.put("item1") q.put("item2") print(f"Queue size: {q.qsize()}") print(f"Got from queue: {q.get()}")
shutil
- 高级文件操作import shutil # Create a dummy file for copy with open('source_shutil.txt', 'w') as f: f.write("This is a source file.") shutil.copyfile('source_shutil.txt', 'dest_shutil.txt') print("File copied using shutil.copyfile.") os.remove('source_shutil.txt') os.remove('dest_shutil.txt')
glob
- Unix 风格路径名模式扩展import glob # Create dummy files with open('file1.txt', 'w') as f: f.write('') with open('file2.py', 'w') as f: f.write('') print("Files matching '*.txt':") for file in glob.glob("*.txt"): print(file) os.remove('file1.txt') os.remove('file2.py')
tempfile
- 生成临时文件和目录import tempfile with tempfile.TemporaryFile(mode='w+') as temp_f: temp_f.write("temporary data") temp_f.seek(0) print(f"Temp file content: {temp_f.read()}") # The file is automatically deleted when it's closed or the context manager exits.
bz2
- Bzip2 压缩支持import bz2 compressed_bz2 = bz2.compress(b'your data here to compress with bz2') print(f"BZ2 compressed data length: {len(compressed_bz2)}") decompressed_bz2 = bz2.decompress(compressed_bz2) print(f"BZ2 decompressed data: {decompressed_bz2}")
gzip
- Gzip 压缩支持import gzip with gzip.open('file.txt.gz', 'wt', encoding='utf-8') as f: f.write('your data here to compress with gzip') print("Gzip file created.") with gzip.open('file.txt.gz', 'rt', encoding='utf-8') as f: print(f"Content from gzip file: {f.read()}") os.remove('file.txt.gz')
ssl
- 套接字对象的 TLS/SSL 包装器import ssl # context = ssl.create_default_context() # wrapped_sock = context.wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) print("SSL wrapping example (conceptual).")
imaplib
- IMAP4 协议客户端import imaplib # mail = imaplib.IMAP4_SSL('imap.example.com') # mail.login('user', 'password') print("IMAPlib example (conceptual).")
smtplib
- SMTP 协议客户端import smtplib # server_smtp = smtplib.SMTP('smtp.example.com', 587) # server_smtp.starttls() # server_smtp.login('user', 'password') print("SMTPlib example (conceptual).")
email
- 管理电子邮件消息from email.message import EmailMessage msg_email = EmailMessage() msg_email['Subject'] = 'Test Email' msg_email['From'] = '[email protected]' msg_email['To'] = '[email protected]' msg_email.set_content('This is a test email body.') print(f"Email message headers:\n{msg_email.as_string()}")
base64
- Base16, Base32, Base64, Base85 数据编码import base64 encoded_data = base64.b64encode(b'data to encode') print(f"Base64 encoded: {encoded_data}") decoded_data = base64.b64decode(encoded_data) print(f"Base64 decoded: {decoded_data}")
difflib
- 计算 Delta 的助手import difflib text1 = 'one\ntwo\nthree\n'.splitlines(keepends=True) text2 = 'ore\ntree\nemu\n'.splitlines(keepends=True) diff = difflib.ndiff(text1, text2) print("\nDiff using difflib:") print(''.join(diff))
gettext
- 多语言国际化服务import gettext # gettext.install('myapp', '/path/to/locales') # _ = gettext.gettext # print(_("Hello World")) print("Gettext example (conceptual for i18n).")
locale
- 国际化服务import locale try: # locale.setlocale(locale.LC_ALL, 'zh_CN.UTF-8') # Example for Chinese locale print(f"Current locale: {locale.getlocale()}") except locale.Error as e: print(f"Could not set locale: {e}")
secrets
- 生成安全的随机数以管理秘密import secrets secure_token = secrets.token_hex(16) # Generates a 32-character hex string print(f"Secure token: {secure_token}")
uuid
- 符合 RFC 4122 的 UUID 对象import uuid unique_id = uuid.uuid4() # Generates a random UUID print(f"Unique ID (UUID): {unique_id}")
html
- 超文本标记语言支持import html escaped = html.escape('<a href="https://example.com">link & symbol</a>') print(f"HTML escaped: {escaped}")
ftplib
- FTP 协议客户端from ftplib import FTP # ftp = FTP('ftp.example.com') # ftp.login('user', 'password') print("FTPlib example (conceptual).")
tarfile
- 读写 Tar 归档文件import tarfile import os # Create a dummy file for archiving with open('sample_tar.txt', 'w') as f: f.write('This is a test file for tar archiving.') with tarfile.open('sample.tar.gz', 'w:gz') as tar: tar.add('sample_tar.txt') print("Tar.gz archive created.") os.remove('sample_tar.txt') os.remove('sample.tar.gz')
推荐阅读
关于
关注我获取更多资讯

