Linux下NAT功能的实现
本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝、转载,转载时请保持文档的完整性,严禁用于任何商业用途。 msn: yfydz_no1@hotmail.com 来源: http://yfydz.cublog.cn
1. 前言 ............................................................................................................ 1 2. NAT hook .................................................................................................... 1 3. NAT处理相关结构 .................................................................................... 2 4. ip_nat_fn()函数 .......................................................................................... 3 5. do_bindings()函数 ...................................................................................... 6 6. SNAT、DNAT目标函数 ......................................................................... 10 7. ip_nat_setup_info()函数 .......................................................................... 12 8. 结论 .......................................................................................................... 16
1. 前言
在2.4/2.6内核的Linux中的防火墙代码netfilter中支持源NAT(SNAT)和目的NAT
(DNAT),基本可以满足各种类型的NAT需求,本文介绍Linux下的NAT的具体实现过程,所引的内核代码版本2.4.26,NAT原理部分不在此介绍,有兴趣者可先看我的另一篇NAT原理介绍的文章。
2. NAT hook
NAT操作也是以netfilter节点形式挂接在相应的处理点上的,DNAT挂接在NF_IP_PRE_ROUTING点上,优先级高于FILTER低于MANGLE,表示在mangle表后处理,但在filter表前处理数据包;SNAT挂接在NF_IP_POST_ROUTING点上,优先级低于FILTER,表示在filter表后面处理数据包。
在net/ipv4/netfilter/ip_nat_standalone.c中:
目的NAT的hook节点:
/* Before packet filtering, change destination */ static struct nf_hook_ops ip_nat_in_ops
= { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST };
源NAT的hook节点:
/* After packet filtering, change source */ static struct nf_hook_ops ip_nat_out_ops
Linux
下nat的实现 1
= { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC};
include/linux/netfilter_ipv4.h
enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN,
NF_IP_PRI_CONNTRACK = -200, // 连接跟踪 NF_IP_PRI_MANGLE = -150, // mangle table NF_IP_PRI_NAT_DST = -100, // DNAT NF_IP_PRI_FILTER = 0, // filter table NF_IP_PRI_NAT_SRC = 100, // SNAT NF_IP_PRI_LAST = INT_MAX, };
ip_nat_fn()是NAT hook的主处理函数,ip_nat_out()函数也是在数据合法性检查后调用ip_nat_fn()函数。
3. NAT处理相关结构
在状态连接结构struct ip_conntrack中包含了关于NAT的相关结构(include/linux/netfilter/ip_conntrack.h):
struct ip_conntrack { ......
#ifdef CONFIG_IP_NF_NAT_NEEDED struct {
struct ip_nat_info info;
union ip_conntrack_nat_help help;
#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \\
defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) int masq_index; #endif } nat;
#endif /* CONFIG_IP_NF_NAT_NEEDED */ };
其中比较重要的是struct ip_nat_info结构,而union ip_conntrack_nat_help是各协议NAT时需要特殊处理的结构描述,不过在2.4.26内核中都没定义,联合为空。
#define IP_NAT_MAX_MANIPS (2*3)
Linux
下nat的实现 2
// 此结构描述数据包中要修改部分的信息 struct ip_nat_info_manip {
/* The direction. */ u_int8_t direction;
/* Which hook the manipulation happens on. */ u_int8_t hooknum;
/* The manipulation type. */
u_int8_t maniptype; // 修改类型: SNAT / DNAT
// 连接的数据包要修改的信息,包括地址和上层的协议信息 /* Manipulations to occur at each conntrack in this dirn. */ struct ip_conntrack_manip manip; };
/* The structure embedded in the conntrack structure. */ struct ip_nat_info {
/* Set to zero when conntrack created: bitmask of maniptypes */ int initialized; // 实际最多用两位 unsigned int num_manips;
/* Manipulations to be done on this conntrack. */ // 每个最多可以记录6个NAT信息
struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS];
struct ip_nat_hash bysource, byipsproto; // 按地址和协议的HASH表 /* Helper (NULL if none). */
struct ip_nat_helper *helper; // 多连接协议的NAT时的helper
struct ip_nat_seq seq[IP_CT_DIR_MAX]; // 描述两个方向的序列号变化情况 };
4. ip_nat_fn()函数
ip_nat_fn()是NAT hook的基本处理函数(net/ipv4/netfilter/ip_nat_standalone.c),目的是建立连接的NAT info信息, 并修改数据包中的相应部分。
static unsigned int
ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb,
const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) {
Linux
下nat的实现 3
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo; struct ip_nat_info *info;
/* maniptype == SRC for postrouting. */
// 根据hooknum来确定进行哪种方式的NAT,netfilter在hook点是能进行哪种NAT是固定的:
// NF_IP_PRE_ROUTING点进行的是DNAT,maniptype=1 // NF_IP_POST_ROUTING点进行的是SNAT,maniptype=0 enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
/* We never see fragments: conntrack defrags on pre-routing and local-out, and ip_nat_out protects post-routing. */ IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); (*pskb)->nfcache |= NFC_UNKNOWN;
/* If we had a hardware checksum before, it's now invalid */
if ((*pskb)->ip_summed == CHECKSUM_HW) (*pskb)->ip_summed = CHECKSUM_NONE;
// 进行NAT的包必须都经过的连接跟踪处理,如果找不到该包对应的连接,不对其进行NAT处理
// 连接跟踪优先级最高,是数据包一进入netfilter就要进行处理的 ct = ip_conntrack_get(*pskb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that protocol. 8) --RR */
if (!ct) {
/* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in case we're doing NAT to the same network. */ struct iphdr *iph = (*pskb)->nh.iph; struct icmphdr *hdr = (struct icmphdr *) ((u_int32_t *)iph + iph->ihl);
if (iph->protocol == IPPROTO_ICMP
&& hdr->type == ICMP_REDIRECT) return NF_DROP; return NF_ACCEPT; }
switch (ctinfo) {
//对于相关连接、相关连接的回复、新连接的包进行NAT信息的构建
Linux
下nat的实现 4
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { return icmp_reply_translation(*pskb, ct, hooknum, CTINFO2DIR(ctinfo)); }
/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: info = &ct->nat.info;
WRITE_LOCK(&ip_nat_lock);
/* Seen it before? This can happen for loopback, retrans, or local packets.. */
// 检查是否已经进行相应方向的初始化,注意初始化可以是两个方向同时进行的
// 这就是说一个数据包可以同时修改源和目的, 这在服务器和内网在相同网段时会用到, // netfilter已经能自动处理这种情况,根本不需要进行修改,以前我的理解有误,以为 // 只能修改一个方向的数据
if (!(info->initialized & (1 << maniptype)) #ifndef CONFIG_IP_NF_NAT_LOCAL
/* If this session has already been confirmed we must not * touch it again even if there is no mapping set up. * Can only happen on local->local traffic with * CONFIG_IP_NF_NAT_LOCAL disabled. */
&& !(ct->status & IPS_CONFIRMED) #endif ) {
unsigned int ret; if (ct->master
&& master_ct(ct)->nat.info.helper
&& master_ct(ct)->nat.info.helper->expect) {
// 多连接协议情况, 如果是子连接, 调用主连接相关的expect函数处理填写NAT info信息 ret = call_expect(master_ct(ct), pskb, hooknum, ct, info); } else {
#ifdef CONFIG_IP_NF_NAT_LOCAL
/* LOCAL_IN hook doesn't have a chain! */ if (hooknum == NF_IP_LOCAL_IN) ret = alloc_null_binding(ct, info, hooknum); else #endif
// 否则根据NAT规则表查找规则, 执行规则的动作: SNAT或DNAT, 填写NAT info信息 ret = ip_nat_rule_find(pskb, hooknum, in, out,
Linux
下nat的实现 5