COMP5313 Assignment 2 - Task 3 题解

Assignment 2 - Task 3:Network Models

题目理解

Task 3 要求实现并分析三种经典网络模型: 1. Erdős-Rényi (ER) 模型:随机网络 2. Watts-Strogatz (WS) 模型:小世界网络 3. Barabási-Albert (BA) 模型:无标度网络

核心目标: - 实现三种网络的生成算法 - 分析度分布、聚类系数、平均路径长度等统计特征 - 与真实网络进行对比,验证理论预测 - 可视化网络结构特征


关键概念回顾

1. Erdős-Rényi (ER) 模型 \(G(n, p)\)

定义: \(n\) 个节点,每对节点以概率 \(p\) 独立连边。

性质: - 度分布:二项分布 \(\binom{n-1}{k} p^k (1-p)^{n-1-k}\),近似泊松分布(\(n\) 大时) - 聚类系数\(C = p = \frac{\langle k \rangle}{n-1}\) - 平均路径长度\(L \sim \frac{\ln n}{\ln \langle k \rangle}\) - 巨分量:当 \(\langle k \rangle > 1\)(即 \(p > \frac{1}{n}\))时出现

相变现象: - \(p < \frac{1}{n}\):网络由孤立小分量组成 - \(p = \frac{1}{n}\):临界阈值,出现巨分量 - \(p > \frac{\ln n}{n}\):网络几乎必然连通

2. Watts-Strogatz (WS) 模型

定义: 从规则格点开始,以概率 \(p\) 重新连边。

算法: 1. 创建 \(n\) 个节点的环,每个节点连 \(k\) 个最近邻居 2. 以概率 \(p\) 将每条边的一个端点随机重连到另一节点

性质: - 聚类系数\(C(p) \approx \frac{3(k-2)}{4(k-1)}(1-p)^3\)(高聚类) - 平均路径长度\(L(p)\)\(p\) 增加快速下降(小世界特性) - 度分布:近似泊松,集中在 \(k\) 附近

3. Barabási-Albert (BA) 模型

定义: 优先连接(Preferential Attachment)模型。

算法: 1. 从 \(m_0\) 个节点开始 2. 每次添加一个新节点,连接 \(m\) 条边 3. 连接到现有节点 \(i\) 的概率:\(\Pi(k_i) = \frac{k_i}{\sum_j k_j}\)

性质: - 度分布:幂律分布 \(P(k) \sim k^{-\gamma}\)\(\gamma = 3\) - 聚类系数\(C \sim \frac{(\ln N)^2}{N}\)(随 \(N\) 缓慢减小) - 平均路径长度\(L \sim \frac{\ln N}{\ln \ln N}\)(超小世界) - 无标度特性:缺乏特征尺度,存在高度数枢纽节点(hub)


Python 实现

基础网络生成

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from collections import Counter

# ==================== ER Model ====================
def generate_er(n, p, seed=None):
"""
Erdős-Rényi 随机网络 G(n, p)
"""
return nx.erdos_renyi_graph(n, p, seed=seed)

# ==================== WS Model ====================
def generate_ws(n, k, p, seed=None):
"""
Watts-Strogatz 小世界网络
n: 节点数
k: 每个节点的邻居数(必须为偶数)
p: 重连概率
"""
return nx.watts_strogatz_graph(n, k, p, seed=seed)

# ==================== BA Model ====================
def generate_ba(n, m, seed=None):
"""
Barabási-Albert 无标度网络
n: 最终节点数
m: 每个新节点连接的边数
"""
return nx.barabasi_albert_graph(n, m, seed=seed)

# 自定义 BA 实现(用于理解算法细节)
def generate_ba_custom(n, m, seed=None):
"""
手动实现 BA 模型,展示优先连接机制
"""
if seed is not None:
np.random.seed(seed)

# 初始完全图 K_m
G = nx.complete_graph(m)

# 逐步添加节点
for new_node in range(m, n):
# 计算现有节点的度
degrees = np.array([G.degree(i) for i in range(new_node)])

# 优先连接概率
probs = degrees / degrees.sum()

# 选择 m 个目标节点(无放回)
targets = np.random.choice(new_node, size=m, replace=False, p=probs)

# 添加边
for target in targets:
G.add_edge(new_node, target)

return G

统计特征计算

def compute_network_stats(G, name="Network"):
"""
计算网络的核心统计特征
"""
n = G.number_of_nodes()
m = G.number_of_edges()

# 基础统计
degrees = [d for n, d in G.degree()]
avg_degree = np.mean(degrees)
max_degree = max(degrees)
min_degree = min(degrees)

# 聚类系数
avg_clustering = nx.average_clustering(G)

# 平均路径长度(仅对连通分量计算)
if nx.is_connected(G):
avg_path_length = nx.average_shortest_path_length(G)
diameter = nx.diameter(G)
else:
# 取最大连通分量
largest_cc = max(nx.connected_components(G), key=len)
subgraph = G.subgraph(largest_cc)
avg_path_length = nx.average_shortest_path_length(subgraph)
diameter = nx.diameter(subgraph)

stats = {
'name': name,
'n_nodes': n,
'n_edges': m,
'avg_degree': avg_degree,
'max_degree': max_degree,
'min_degree': min_degree,
'density': nx.density(G),
'clustering': avg_clustering,
'avg_path_length': avg_path_length,
'diameter': diameter,
'degrees': degrees
}

return stats


def compare_models(n=1000):
"""
生成三种网络并比较统计特征
"""
# 参数设置(保证可比性,平均度约为 4)
p_er = 4 / (n - 1) # ER: <k> = (n-1)p = 4
k_ws = 4 # WS: 每个节点连 4 个邻居
p_ws = 0.1 # WS: 10% 重连
m_ba = 2 # BA: 每个新节点连 2 条边 -> <k> = 2m = 4

# 生成网络
G_er = generate_er(n, p_er, seed=42)
G_ws = generate_ws(n, k_ws, p_ws, seed=42)
G_ba = generate_ba(n, m_ba, seed=42)

# 计算统计
stats_er = compute_network_stats(G_er, "ER")
stats_ws = compute_network_stats(G_ws, "WS")
stats_ba = compute_network_stats(G_ba, "BA")

# 打印对比表
print(f"{'Metric':<20} {'ER':>12} {'WS':>12} {'BA':>12}")
print("=" * 60)
metrics = [
('Nodes', 'n_nodes', '{:.0f}'),
('Edges', 'n_edges', '{:.0f}'),
('Avg Degree', 'avg_degree', '{:.2f}'),
('Max Degree', 'max_degree', '{:.0f}'),
('Clustering', 'clustering', '{:.4f}'),
('Avg Path Len', 'avg_path_length', '{:.2f}'),
('Diameter', 'diameter', '{:.0f}'),
('Density', 'density', '{:.5f}')
]

for label, key, fmt in metrics:
print(f"{label:<20} {fmt.format(stats_er[key]):>12} "
f"{fmt.format(stats_ws[key]):>12} {fmt.format(stats_ba[key]):>12}")

return stats_er, stats_ws, stats_ba

# 运行对比
stats_er, stats_ws, stats_ba = compare_models(n=1000)

度分布分析

def plot_degree_distribution(stats_list, names, colors, figsize=(15, 5)):
"""
绘制度分布对比(线性坐标和对数坐标)
"""
fig, axes = plt.subplots(1, 3, figsize=figsize)

for idx, (stats, name, color) in enumerate(zip(stats_list, names, colors)):
degrees = stats['degrees']

# 计算度分布
degree_counts = Counter(degrees)
k_values = sorted(degree_counts.keys())
p_k = [degree_counts[k] / len(degrees) for k in k_values]

# 子图 1: 线性坐标
axes[0].plot(k_values, p_k, 'o-', color=color, label=name, alpha=0.7)

# 子图 2: 对数坐标(适合 ER/WS)
axes[1].semilogy(k_values, p_k, 'o-', color=color, label=name, alpha=0.7)

# 子图 3: 双对数坐标(适合 BA 的幂律)
# 过滤掉 0 值
k_filtered = [k for k, p in zip(k_values, p_k) if p > 0]
p_filtered = [p for p in p_k if p > 0]
axes[2].loglog(k_filtered, p_filtered, 'o', color=color, label=name, alpha=0.7)

axes[0].set_xlabel('Degree $k$')
axes[0].set_ylabel('$P(k)$')
axes[0].set_title('Linear Scale')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].set_xlabel('Degree $k$')
axes[1].set_ylabel('$P(k)$')
axes[1].set_title('Semi-log Scale')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

axes[2].set_xlabel('Degree $k$')
axes[2].set_ylabel('$P(k)$')
axes[2].set_title('Log-log Scale')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('degree_distribution_comparison.png', dpi=300)
plt.show()


# 运行度分布对比
plot_degree_distribution(
[stats_er, stats_ws, stats_ba],
['ER', 'WS', 'BA'],
['#3498db', '#2ecc71', '#e74c3c']
)

BA 模型的幂律验证

def fit_power_law(degrees, xmin=1):
"""
拟合幂律分布,估计指数 gamma
"""
# 筛选大于 xmin 的度值
filtered = [d for d in degrees if d >= xmin]

# 最大似然估计
n = len(filtered)
gamma = 1 + n / sum(np.log(d / (xmin - 0.5)) for d in filtered)

return gamma


def plot_ba_power_law(G, n_bins=50):
"""
可视化 BA 网络的幂律特性
"""
degrees = [d for n, d in G.degree()]

# 计算度分布
degree_counts = Counter(degrees)
k_values = np.array(sorted(degree_counts.keys()))
p_k = np.array([degree_counts[k] for k in k_values]) / len(degrees)

# 拟合幂律
gamma_fit = fit_power_law(degrees, xmin=5)
print(f"Fitted power-law exponent: γ = {gamma_fit:.2f}")
print(f"Theoretical value: γ = 3.00")

# 绘制
plt.figure(figsize=(10, 7))
plt.loglog(k_values, p_k, 'bo', alpha=0.6, label='Data')

# 理论曲线
k_theory = np.logspace(np.log10(min(k_values[k_values > 0])),
np.log10(max(k_values)), 100)
# 归一化常数
C = p_k[0] * k_values[0]**gamma_fit
p_theory = C * k_theory**(-gamma_fit)
plt.loglog(k_theory, p_theory, 'r--', linewidth=2,
label=f'Fit: $P(k) \\sim k^{{-{gamma_fit:.2f}}}$')

# 理论值 γ=3
C3 = p_k[0] * k_values[0]**3
p_theory_3 = C3 * k_theory**(-3)
plt.loglog(k_theory, p_theory_3, 'g:', linewidth=2,
label='Theory: $P(k) \\sim k^{-3}$')

plt.xlabel('Degree $k$', fontsize=12)
plt.ylabel('$P(k)$', fontsize=12)
plt.title('BA Model: Power-Law Degree Distribution', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('ba_power_law.png', dpi=300)
plt.show()


# 测试不同规模的 BA 网络
for n in [500, 1000, 5000]:
G = generate_ba(n, m=2, seed=42)
gamma = fit_power_law([d for _, d in G.degree()], xmin=5)
print(f"n={n:5d}: γ = {gamma:.3f}")

WS 模型的小世界特性

def analyze_ws_transition(n=1000, k=10):
"""
分析 WS 模型从规则到随机的过渡
"""
p_values = np.logspace(-4, 0, 50) # 0.0001 到 1

C_values = [] # 聚类系数
L_values = [] # 平均路径长度

for p in p_values:
G = generate_ws(n, k, p, seed=42)
C = nx.average_clustering(G)

# 计算最大连通分量的平均路径长度
largest_cc = max(nx.connected_components(G), key=len)
subgraph = G.subgraph(largest_cc)
if len(largest_cc) > 1:
L = nx.average_shortest_path_length(subgraph)
else:
L = 0

C_values.append(C)
L_values.append(L)

# 归一化(相对于 p=0 的值)
C_0 = C_values[0] if C_values[0] > 0 else 1
L_0 = L_values[0] if L_values[0] > 0 else 1

C_normalized = [c / C_0 for c in C_values]
L_normalized = [l / L_0 for l in L_values]

# 绘图
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# 左图:归一化值
ax1.semilogx(p_values, C_normalized, 'o-', color='#3498db',
label='C(p) / C(0)', markersize=4)
ax1.semilogx(p_values, L_normalized, 's-', color='#e74c3c',
label='L(p) / L(0)', markersize=4)
ax1.axvline(x=0.1, color='gray', linestyle='--', alpha=0.5, label='p=0.1')
ax1.set_xlabel('Rewiring probability $p$', fontsize=12)
ax1.set_ylabel('Normalized value', fontsize=12)
ax1.set_title('WS Model: Small-World Transition', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

# 右图:C/L 比值(小世界区域)
ratio = [c / l if l > 0 else 0 for c, l in zip(C_normalized, L_normalized)]
ax2.semilogx(p_values, ratio, '^-', color='#2ecc71', markersize=4)
ax2.axvline(x=0.1, color='gray', linestyle='--', alpha=0.5)
ax2.set_xlabel('Rewiring probability $p$', fontsize=12)
ax2.set_ylabel('C(p)/L(p) relative to p=0', fontsize=12)
ax2.set_title('Small-World Region (high C, low L)', fontsize=14)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('ws_small_world_transition.png', dpi=300)
plt.show()


# 运行分析
analyze_ws_transition(n=1000, k=10)

网络可视化

def visualize_networks(n=100, save_path=None):
"""
可视化三种网络的结构对比
"""
# 参数
p_er = 4 / (n - 1)
k_ws = 4
p_ws = 0.1
m_ba = 2

# 生成网络
G_er = generate_er(n, p_er, seed=42)
G_ws = generate_ws(n, k_ws, p_ws, seed=42)
G_ba = generate_ba(n, m_ba, seed=42)

networks = [
(G_er, 'Erdős-Rényi\nRandom Network', '#3498db'),
(G_ws, 'Watts-Strogatz\nSmall-World Network', '#2ecc71'),
(G_ba, 'Barabási-Albert\nScale-Free Network', '#e74c3c')
]

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for idx, (G, title, color) in enumerate(networks):
ax = axes[idx]

# 布局
if idx == 2: # BA 用 spring layout 更好展示 hub 结构
pos = nx.spring_layout(G, seed=42, k=0.5)
else:
pos = nx.spring_layout(G, seed=42)

# 节点大小根据度调整
degrees = dict(G.degree())
node_sizes = [degrees[node] * 30 + 50 for node in G.nodes()]

# 绘制
nx.draw_networkx_nodes(G, pos, node_size=node_sizes,
node_color=color, alpha=0.7, ax=ax)
nx.draw_networkx_edges(G, pos, alpha=0.3, width=0.8, ax=ax)

# 统计
stats_text = f"N={G.number_of_nodes()}, E={G.number_of_edges()}\n"
stats_text += f"⟨k⟩={np.mean(list(degrees.values())):.1f}\n"
stats_text += f"C={nx.average_clustering(G):.3f}"

ax.set_title(title, fontsize=14, pad=10)
ax.text(0.5, 0.02, stats_text, transform=ax.transAxes,
ha='center', fontsize=10,
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
ax.axis('off')

plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.show()


# 生成可视化
visualize_networks(n=100, save_path='network_models_visualization.png')

理论验证

验证 1: ER 网络的相变

def verify_er_phase_transition(n=1000, n_p=50):
"""
验证 ER 网络的连通性相变
"""
p_values = np.linspace(0, 0.01, n_p) # 关注临界区域

largest_cc_sizes = []
avg_path_lengths = []

for p in p_values:
G = generate_er(n, p, seed=42)

# 最大连通分量大小
largest_cc = max(nx.connected_components(G), key=len)
largest_cc_sizes.append(len(largest_cc) / n) # 占比

# 平均路径长度(在最大分量上)
if len(largest_cc) > 1:
subgraph = G.subgraph(largest_cc)
try:
L = nx.average_shortest_path_length(subgraph)
avg_path_lengths.append(L)
except:
avg_path_lengths.append(np.nan)
else:
avg_path_lengths.append(np.nan)

# 绘图
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# 理论临界值 p_c = 1/n
p_c = 1 / n

ax1.plot(p_values, largest_cc_sizes, 'b-', linewidth=2)
ax1.axvline(x=p_c, color='r', linestyle='--', label=f'$p_c = 1/n = {p_c:.4f}$')
ax1.set_xlabel('$p$', fontsize=12)
ax1.set_ylabel('Fraction of nodes in largest component', fontsize=12)
ax1.set_title('ER Network: Phase Transition', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(p_values, avg_path_lengths, 'g-', linewidth=2)
ax2.axvline(x=p_c, color='r', linestyle='--', label=f'$p_c = {p_c:.4f}$')
ax2.set_xlabel('$p$', fontsize=12)
ax2.set_ylabel('Average path length', fontsize=12)
ax2.set_title('Average Path Length near Critical Point', fontsize=14)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('er_phase_transition.png', dpi=300)
plt.show()


verify_er_phase_transition(n=1000)

验证 2: BA 模型的度分布幂律

def verify_ba_degree_distribution(n=10000, m=2):
"""
详细验证 BA 模型的度分布
"""
G = generate_ba(n, m, seed=42)
degrees = [d for _, d in G.degree()]

# 累积分布函数 (CDF)
degree_counts = Counter(degrees)
k_values = sorted(degree_counts.keys())

# P(K > k)
cdf_complement = []
total = len(degrees)
cumulative = 0
for k in reversed(k_values):
cumulative += degree_counts[k]
cdf_complement.append((k, cumulative / total))
cdf_complement.reverse()

k_ccdf, p_ccdf = zip(*cdf_complement)

# 线性回归估计幂律指数
log_k = np.log(k_ccdf[10:100]) # 避开低度噪声和高度截断
log_p = np.log(p_ccdf[10:100])
slope, intercept, r_value, _, _ = stats.linregress(log_k, log_p)

gamma_estimated = -slope + 1
print(f"Estimated γ from CCDF slope: {gamma_estimated:.3f}")
print(f"R² = {r_value**2:.4f}")

# 绘图
plt.figure(figsize=(10, 7))
plt.loglog(k_ccdf, p_ccdf, 'bo', alpha=0.5, label='Empirical CCDF')

# 拟合线
k_fit = np.logspace(np.log10(min(k_ccdf)), np.log10(max(k_ccdf)), 100)
p_fit = np.exp(intercept) * k_fit**slope
plt.loglog(k_fit, p_fit, 'r--', linewidth=2,
label=f'Fit: slope = {slope:.2f}, γ = {gamma_estimated:.2f}')

# 理论线 γ=3
k_theory = np.logspace(np.log10(5), np.log10(200), 100)
C_theory = p_ccdf[5] * k_ccdf[5]**2 # 归一化
p_theory = C_theory * k_theory**(-2) # CCDF slope = -(γ-1) = -2
plt.loglog(k_theory, p_theory, 'g:', linewidth=2, label='Theory (γ=3)')

plt.xlabel('Degree $k$', fontsize=12)
plt.ylabel('$P(K > k)$', fontsize=12)
plt.title('BA Model: Cumulative Degree Distribution', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('ba_ccdf.png', dpi=300)
plt.show()


verify_ba_degree_distribution(n=10000, m=2)

完整实验流程

def run_complete_experiment():
"""
运行完整的 Task 3 实验
"""
print("=" * 60)
print("COMP5313 Assignment 2 - Task 3: Network Models")
print("=" * 60)

# 1. 生成网络
n = 1000
print(f"\n[1] Generating networks with n={n}...")

G_er = generate_er(n, p=4/(n-1), seed=42)
G_ws = generate_ws(n, k=4, p=0.1, seed=42)
G_ba = generate_ba(n, m=2, seed=42)

print(" ER: Random network generated")
print(" WS: Small-world network generated")
print(" BA: Scale-free network generated")

# 2. 计算统计特征
print("\n[2] Computing network statistics...")
stats_er = compute_network_stats(G_er, "ER")
stats_ws = compute_network_stats(G_ws, "WS")
stats_ba = compute_network_stats(G_ba, "BA")

# 3. 生成对比表
print("\n[3] Comparison Table:")
print("-" * 70)
print(f"{'Metric':<25} {'ER':>14} {'WS':>14} {'BA':>14}")
print("-" * 70)
print(f"{'Nodes':<25} {stats_er['n_nodes']:>14} {stats_ws['n_nodes']:>14} {stats_ba['n_nodes']:>14}")
print(f"{'Edges':<25} {stats_er['n_edges']:>14} {stats_ws['n_edges']:>14} {stats_ba['n_edges']:>14}")
print(f"{'Avg Degree':<25} {stats_er['avg_degree']:>14.2f} {stats_ws['avg_degree']:>14.2f} {stats_ba['avg_degree']:>14.2f}")
print(f"{'Clustering Coeff':<25} {stats_er['clustering']:>14.4f} {stats_ws['clustering']:>14.4f} {stats_ba['clustering']:>14.4f}")
print(f"{'Avg Path Length':<25} {stats_er['avg_path_length']:>14.2f} {stats_ws['avg_path_length']:>14.2f} {stats_ba['avg_path_length']:>14.2f}")
print(f"{'Max Degree':<25} {stats_er['max_degree']:>14} {stats_ws['max_degree']:>14} {stats_ba['max_degree']:>14}")
print("-" * 70)

# 4. 生成可视化
print("\n[4] Generating visualizations...")
visualize_networks(n=100, save_path='network_models.png')
plot_degree_distribution([stats_er, stats_ws, stats_ba],
['ER', 'WS', 'BA'],
['#3498db', '#2ecc71', '#e74c3c'])

# 5. BA 幂律验证
print("\n[5] Verifying BA power-law distribution...")
plot_ba_power_law(G_ba)

# 6. WS 小世界验证
print("\n[6] Analyzing WS small-world transition...")
analyze_ws_transition(n=1000, k=10)

# 7. 保存结果
print("\n[7] Saving results...")
with open('task3_results.txt', 'w') as f:
f.write("COMP5313 Assignment 2 - Task 3 Results\n")
f.write("=" * 50 + "\n\n")
for name, stats in [('ER', stats_er), ('WS', stats_ws), ('BA', stats_ba)]:
f.write(f"{name} Model:\n")
for key, value in stats.items():
if key not in ['degrees', 'name']:
f.write(f" {key}: {value}\n")
f.write("\n")

print("\n" + "=" * 60)
print("Experiment complete! Check generated files:")
print(" - network_models.png")
print(" - degree_distribution_comparison.png")
print(" - ba_power_law.png")
print(" - ws_small_world_transition.png")
print(" - task3_results.txt")
print("=" * 60)


if __name__ == '__main__':
run_complete_experiment()

评分要点

评分项 权重 检查点
算法实现 25% ER/WS/BA 正确实现,参数可调
度分布分析 25% 正确计算、可视化、理论对比
统计特征 20% 聚类系数、路径长度等完整分析
理论验证 20% 幂律验证、相变分析、小世界特性
报告质量 10% 图表清晰、解释充分

关键理论点: - ER: 相变现象 \(p_c = 1/n\) - WS: 小世界区域 \(0.01 < p < 0.1\) - BA: 幂律分布 \(P(k) \sim k^{-3}\),优先连接机制


Last Updated: 2026-04-19
Status: ✅ Complete