Module dkim
[hide private]
[frames] | no frames]

Source Code for Module dkim

  1  # This software is provided 'as-is', without any express or implied 
  2  # warranty.  In no event will the author be held liable for any damages 
  3  # arising from the use of this software. 
  4  #  
  5  # Permission is granted to anyone to use this software for any purpose, 
  6  # including commercial applications, and to alter it and redistribute it 
  7  # freely, subject to the following restrictions: 
  8  #  
  9  # 1. The origin of this software must not be misrepresented; you must not 
 10  #    claim that you wrote the original software. If you use this software 
 11  #    in a product, an acknowledgment in the product documentation would be 
 12  #    appreciated but is not required. 
 13  # 2. Altered source versions must be plainly marked as such, and must not be 
 14  #    misrepresented as being the original software. 
 15  # 3. This notice may not be removed or altered from any source distribution. 
 16  #  
 17  # Copyright (c) 2008 Greg Hewgill http://hewgill.com 
 18   
 19  import base64 
 20  import hashlib 
 21  import re 
 22  import time 
 23   
 24  import dns.resolver 
 25   
 26  __all__ = [ 
 27      "Simple", 
 28      "Relaxed", 
 29      "InternalError", 
 30      "KeyFormatError", 
 31      "MessageFormatError", 
 32      "ParameterError", 
 33      "sign", 
 34      "verify", 
 35  ] 
36 37 -class Simple:
38 """Class that represents the "simple" canonicalization algorithm.""" 39 40 name = "simple" 41 42 @staticmethod
43 - def canonicalize_headers(headers):
44 # No changes to headers. 45 return headers
46 47 @staticmethod
48 - def canonicalize_body(body):
49 # Ignore all empty lines at the end of the message body. 50 return re.sub("(\r\n)*$", "\r\n", body)
51
52 -class Relaxed:
53 """Class that represents the "relaxed" canonicalization algorithm.""" 54 55 name = "relaxed" 56 57 @staticmethod
58 - def canonicalize_headers(headers):
59 # Convert all header field names to lowercase. 60 # Unfold all header lines. 61 # Compress WSP to single space. 62 # Remove all WSP at the start or end of the field value (strip). 63 return [(x[0].lower(), re.sub(r"\s+", " ", re.sub("\r\n", "", x[1])).strip()+"\r\n") for x in headers]
64 65 @staticmethod
66 - def canonicalize_body(body):
67 # Remove all trailing WSP at end of lines. 68 # Compress non-line-ending WSP to single space. 69 # Ignore all empty lines at the end of the message body. 70 return re.sub("(\r\n)*$", "\r\n", re.sub(r"[\x09\x20]+", " ", re.sub("[\\x09\\x20]+\r\n", "\r\n", body)))
71
72 -class DKIMException(Exception):
73 """Base class for DKIM errors.""" 74 pass
75
76 -class InternalError(DKIMException):
77 """Internal error in dkim module. Should never happen.""" 78 pass
79
80 -class KeyFormatError(DKIMException):
81 """Key format error while parsing an RSA public or private key.""" 82 pass
83
84 -class MessageFormatError(DKIMException):
85 """RFC822 message format error.""" 86 pass
87
88 -class ParameterError(DKIMException):
89 """Input parameter error.""" 90 pass
91
92 -def _remove(s, t):
93 i = s.find(t) 94 assert i >= 0 95 return s[:i] + s[i+len(t):]
96 97 INTEGER = 0x02 98 BIT_STRING = 0x03 99 OCTET_STRING = 0x04 100 NULL = 0x05 101 OBJECT_IDENTIFIER = 0x06 102 SEQUENCE = 0x30 103 104 ASN1_Object = [ 105 (SEQUENCE, [ 106 (SEQUENCE, [ 107 (OBJECT_IDENTIFIER,), 108 (NULL,), 109 ]), 110 (BIT_STRING,), 111 ]) 112 ] 113 114 ASN1_RSAPublicKey = [ 115 (SEQUENCE, [ 116 (INTEGER,), 117 (INTEGER,), 118 ]) 119 ] 120 121 ASN1_RSAPrivateKey = [ 122 (SEQUENCE, [ 123 (INTEGER,), 124 (INTEGER,), 125 (INTEGER,), 126 (INTEGER,), 127 (INTEGER,), 128 (INTEGER,), 129 (INTEGER,), 130 (INTEGER,), 131 (INTEGER,), 132 ]) 133 ]
134 135 -def asn1_parse(template, data):
136 """Parse a data structure according to ASN.1 template. 137 138 @param template: A list of tuples comprising the ASN.1 template. 139 @param data: A list of bytes to parse. 140 141 """ 142 143 r = [] 144 i = 0 145 for t in template: 146 tag = ord(data[i]) 147 i += 1 148 if tag == t[0]: 149 length = ord(data[i]) 150 i += 1 151 if length & 0x80: 152 n = length & 0x7f 153 length = 0 154 for j in range(n): 155 length = (length << 8) | ord(data[i]) 156 i += 1 157 if tag == INTEGER: 158 n = 0 159 for j in range(length): 160 n = (n << 8) | ord(data[i]) 161 i += 1 162 r.append(n) 163 elif tag == BIT_STRING: 164 r.append(data[i:i+length]) 165 i += length 166 elif tag == NULL: 167 assert length == 0 168 r.append(None) 169 elif tag == OBJECT_IDENTIFIER: 170 r.append(data[i:i+length]) 171 i += length 172 elif tag == SEQUENCE: 173 r.append(asn1_parse(t[1], data[i:i+length])) 174 i += length 175 else: 176 raise KeyFormatError("Unexpected tag in template: %02x" % tag) 177 else: 178 raise KeyFormatError("Unexpected tag (got %02x, expecting %02x)" % (tag, t[0])) 179 return r
180
181 -def asn1_length(n):
182 """Return a string representing a field length in ASN.1 format.""" 183 assert n >= 0 184 if n < 0x7f: 185 return chr(n) 186 r = "" 187 while n > 0: 188 r = chr(n & 0xff) + r 189 n >>= 8 190 return r
191
192 -def asn1_build(node):
193 """Build an ASN.1 data structure based on pairs of (type, data).""" 194 if node[0] == OCTET_STRING: 195 return chr(OCTET_STRING) + asn1_length(len(node[1])) + node[1] 196 if node[0] == NULL: 197 assert node[1] is None 198 return chr(NULL) + asn1_length(0) 199 elif node[0] == OBJECT_IDENTIFIER: 200 return chr(OBJECT_IDENTIFIER) + asn1_length(len(node[1])) + node[1] 201 elif node[0] == SEQUENCE: 202 r = "" 203 for x in node[1]: 204 r += asn1_build(x) 205 return chr(SEQUENCE) + asn1_length(len(r)) + r 206 else: 207 raise InternalError("Unexpected tag in template: %02x" % node[0])
208 209 # These values come from RFC 3447, section 9.2 Notes, page 43. 210 HASHID_SHA1 = "\x2b\x0e\x03\x02\x1a" 211 HASHID_SHA256 = "\x60\x86\x48\x01\x65\x03\x04\x02\x01"
212 213 -def str2int(s):
214 """Convert an octet string to an integer. Octet string assumed to represent a positive integer.""" 215 r = 0 216 for c in s: 217 r = (r << 8) | ord(c) 218 return r
219
220 -def int2str(n, length = -1):
221 """Convert an integer to an octet string. Number must be positive. 222 223 @param n: Number to convert. 224 @param length: Minimum length, or -1 to return the smallest number of bytes that represent the integer. 225 226 """ 227 228 assert n >= 0 229 r = [] 230 while length < 0 or len(r) < length: 231 r.append(chr(n & 0xff)) 232 n >>= 8 233 if length < 0 and n == 0: break 234 r.reverse() 235 assert length < 0 or len(r) == length 236 return r
237
238 -def rfc822_parse(message):
239 """Parse a message in RFC822 format. 240 241 @param message: The message in RFC822 format. Either CRLF or LF is an accepted line separator. 242 243 @return Returns a tuple of (headers, body) where headers is a list of (name, value) pairs. 244 The body is a CRLF-separated string. 245 246 """ 247 248 headers = [] 249 lines = re.split("\r?\n", message) 250 i = 0 251 while i < len(lines): 252 if len(lines[i]) == 0: 253 # End of headers, return what we have plus the body, excluding the blank line. 254 i += 1 255 break 256 if re.match(r"[\x09\x20]", lines[i][0]): 257 headers[-1][1] += lines[i]+"\r\n" 258 else: 259 m = re.match(r"([\x21-\x7e]+?):", lines[i]) 260 if m is not None: 261 headers.append([m.group(1), lines[i][m.end(0):]+"\r\n"]) 262 elif lines[i].startswith("From "): 263 pass 264 else: 265 raise MessageFormatError("Unexpected characters in RFC822 header: %s" % lines[i]) 266 i += 1 267 return (headers, "\r\n".join(lines[i:]))
268
269 -def dnstxt(name):
270 """Return a TXT record associated with a DNS name.""" 271 a = dns.resolver.query(name, dns.rdatatype.TXT) 272 for r in a.response.answer: 273 if r.rdtype == dns.rdatatype.TXT: 274 return "".join(r.items[0].strings) 275 return None
276
277 -def fold(header):
278 """Fold a header line into multiple crlf-separated lines at column 72.""" 279 i = header.rfind("\r\n ") 280 if i == -1: 281 pre = "" 282 else: 283 i += 3 284 pre = header[:i] 285 header = header[i:] 286 while len(header) > 72: 287 i = header[:72].rfind(" ") 288 if i == -1: 289 j = i 290 else: 291 j = i + 1 292 pre += header[:i] + "\r\n " 293 header = header[j:] 294 return pre + header
295
296 -def sign(message, selector, domain, privkey, identity=None, canonicalize=(Simple, Simple), include_headers=None, length=False, debuglog=None):
297 """Sign an RFC822 message and return the DKIM-Signature header line. 298 299 @param message: an RFC822 formatted message (with either \\n or \\r\\n line endings) 300 @param selector: the DKIM selector value for the signature 301 @param domain: the DKIM domain value for the signature 302 @param privkey: a PKCS#1 private key in base64-encoded text form 303 @param identity: the DKIM identity value for the signature (default "@"+domain) 304 @param canonicalize: the canonicalization algorithms to use (default (Simple, Simple)) 305 @param include_headers: a list of strings indicating which headers are to be signed (default all headers) 306 @param length: true if the l= tag should be included to indicate body length (default False) 307 @param debuglog: a file-like object to which debug info will be written (default None) 308 309 """ 310 311 (headers, body) = rfc822_parse(message) 312 313 m = re.search("--\n(.*?)\n--", privkey, re.DOTALL) 314 if m is None: 315 raise KeyFormatError("Private key not found") 316 try: 317 pkdata = base64.b64decode(m.group(1)) 318 except TypeError, e: 319 raise KeyFormatError(str(e)) 320 if debuglog is not None: 321 print >>debuglog, " ".join("%02x" % ord(x) for x in pkdata) 322 pka = asn1_parse(ASN1_RSAPrivateKey, pkdata) 323 pk = { 324 'version': pka[0][0], 325 'modulus': pka[0][1], 326 'publicExponent': pka[0][2], 327 'privateExponent': pka[0][3], 328 'prime1': pka[0][4], 329 'prime2': pka[0][5], 330 'exponent1': pka[0][6], 331 'exponent2': pka[0][7], 332 'coefficient': pka[0][8], 333 } 334 335 if identity is not None and not identity.endswith(domain): 336 raise ParameterError("identity must end with domain") 337 338 headers = canonicalize[0].canonicalize_headers(headers) 339 340 if include_headers is None: 341 include_headers = [x[0].lower() for x in headers] 342 else: 343 include_headers = [x.lower() for x in include_headers] 344 sign_headers = [x for x in headers if x[0].lower() in include_headers] 345 346 body = canonicalize[1].canonicalize_body(body) 347 348 h = hashlib.sha256() 349 h.update(body) 350 bodyhash = base64.b64encode(h.digest()) 351 352 sigfields = [x for x in [ 353 ('v', "1"), 354 ('a', "rsa-sha256"), 355 ('c', "%s/%s" % (canonicalize[0].name, canonicalize[1].name)), 356 ('d', domain), 357 ('i', identity or "@"+domain), 358 length and ('l', len(body)), 359 ('q', "dns/txt"), 360 ('s', selector), 361 ('t', str(int(time.time()))), 362 ('h', " : ".join(x[0] for x in sign_headers)), 363 ('bh', bodyhash), 364 ('b', ""), 365 ] if x] 366 sig = "DKIM-Signature: " + "; ".join("%s=%s" % x for x in sigfields) 367 368 sig = fold(sig) 369 370 if debuglog is not None: 371 print >>debuglog, "sign headers:", sign_headers + [("DKIM-Signature", " "+"; ".join("%s=%s" % x for x in sigfields))] 372 h = hashlib.sha256() 373 for x in sign_headers: 374 h.update(x[0]) 375 h.update(":") 376 h.update(x[1]) 377 h.update(sig) 378 d = h.digest() 379 if debuglog is not None: 380 print >>debuglog, "sign digest:", " ".join("%02x" % ord(x) for x in d) 381 382 dinfo = asn1_build( 383 (SEQUENCE, [ 384 (SEQUENCE, [ 385 (OBJECT_IDENTIFIER, HASHID_SHA256), 386 (NULL, None), 387 ]), 388 (OCTET_STRING, d), 389 ]) 390 ) 391 modlen = len(int2str(pk['modulus'])) 392 if len(dinfo)+3 > modlen: 393 raise ParameterError("Hash too large for modulus") 394 sig2 = int2str(pow(str2int("\x00\x01"+"\xff"*(modlen-len(dinfo)-3)+"\x00"+dinfo), pk['privateExponent'], pk['modulus']), modlen) 395 sig += base64.b64encode(''.join(sig2)) 396 397 return sig + "\r\n"
398
399 -def verify(message, debuglog=None):
400 """Verify a DKIM signature on an RFC822 formatted message. 401 402 @param message: an RFC822 formatted message (with either \\n or \\r\\n line endings) 403 @param debuglog: a file-like object to which debug info will be written (default None) 404 405 """ 406 407 (headers, body) = rfc822_parse(message) 408 409 sigheaders = [x for x in headers if x[0].lower() == "dkim-signature"] 410 if len(sigheaders) < 1: 411 return False 412 413 # Currently, we only validate the first DKIM-Signature line found. 414 415 a = re.split(r"\s*;\s*", sigheaders[0][1].strip()) 416 if debuglog is not None: 417 print >>debuglog, "a:", a 418 sig = {} 419 for x in a: 420 if x: 421 m = re.match(r"(\w+)\s*=\s*(.*)", x, re.DOTALL) 422 if m is None: 423 if debuglog is not None: 424 print >>debuglog, "invalid format of signature part: %s" % x 425 return False 426 sig[m.group(1)] = m.group(2) 427 if debuglog is not None: 428 print >>debuglog, "sig:", sig 429 430 if 'v' not in sig: 431 if debuglog is not None: 432 print >>debuglog, "signature missing v=" 433 return False 434 if sig['v'] != "1": 435 if debuglog is not None: 436 print >>debuglog, "v= value is not 1 (%s)" % sig['v'] 437 return False 438 if 'a' not in sig: 439 if debuglog is not None: 440 print >>debuglog, "signature missing a=" 441 return False 442 if 'b' not in sig: 443 if debuglog is not None: 444 print >>debuglog, "signature missing b=" 445 return False 446 if re.match(r"[\s0-9A-Za-z+/]+=*$", sig['b']) is None: 447 if debuglog is not None: 448 print >>debuglog, "b= value is not valid base64 (%s)" % sig['b'] 449 return False 450 if 'bh' not in sig: 451 if debuglog is not None: 452 print >>debuglog, "signature missing bh=" 453 return False 454 if re.match(r"[\s0-9A-Za-z+/]+=*$", sig['bh']) is None: 455 if debuglog is not None: 456 print >>debuglog, "bh= value is not valid base64 (%s)" % sig['bh'] 457 return False 458 if 'd' not in sig: 459 if debuglog is not None: 460 print >>debuglog, "signature missing d=" 461 return False 462 if 'h' not in sig: 463 if debuglog is not None: 464 print >>debuglog, "signature missing h=" 465 return False 466 if 'i' in sig and (not sig['i'].endswith(sig['d']) or sig['i'][-len(sig['d'])-1] not in "@."): 467 if debuglog is not None: 468 print >>debuglog, "i= domain is not a subdomain of d= (i=%s d=%d)" % (sig['i'], sig['d']) 469 return False 470 if 'l' in sig and re.match(r"\d{,76}$", sig['l']) is None: 471 if debuglog is not None: 472 print >>debuglog, "l= value is not a decimal integer (%s)" % sig['l'] 473 return False 474 if 'q' in sig and sig['q'] != "dns/txt": 475 if debuglog is not None: 476 print >>debuglog, "q= value is not dns/txt (%s)" % sig['q'] 477 return False 478 if 's' not in sig: 479 if debuglog is not None: 480 print >>debuglog, "signature missing s=" 481 return False 482 if 't' in sig and re.match(r"\d+$", sig['t']) is None: 483 if debuglog is not None: 484 print >>debuglog, "t= value is not a decimal integer (%s)" % sig['t'] 485 return False 486 if 'x' in sig: 487 if re.match(r"\d+$", sig['x']) is None: 488 if debuglog is not None: 489 print >>debuglog, "x= value is not a decimal integer (%s)" % sig['x'] 490 return False 491 if int(sig['x']) < int(sig['t']): 492 if debuglog is not None: 493 print >>debuglog, "x= value is less than t= value (x=%s t=%s)" % (sig['x'], sig['t']) 494 return False 495 496 m = re.match("(\w+)(?:/(\w+))?$", sig['c']) 497 if m is None: 498 if debuglog is not None: 499 print >>debuglog, "c= value is not in format method/method (%s)" % sig['c'] 500 return False 501 can_headers = m.group(1) 502 if m.group(2) is not None: 503 can_body = m.group(2) 504 else: 505 can_body = "simple" 506 507 if can_headers == "simple": 508 canonicalize_headers = Simple 509 elif can_headers == "relaxed": 510 canonicalize_headers = Relaxed 511 else: 512 if debuglog is not None: 513 print >>debuglog, "Unknown header canonicalization (%s)" % can_headers 514 return False 515 516 headers = canonicalize_headers.canonicalize_headers(headers) 517 518 if can_body == "simple": 519 body = Simple.canonicalize_body(body) 520 elif can_body == "relaxed": 521 body = Relaxed.canonicalize_body(body) 522 else: 523 if debuglog is not None: 524 print >>debuglog, "Unknown body canonicalization (%s)" % can_body 525 return False 526 527 if sig['a'] == "rsa-sha1": 528 hasher = hashlib.sha1 529 hashid = HASHID_SHA1 530 elif sig['a'] == "rsa-sha256": 531 hasher = hashlib.sha256 532 hashid = HASHID_SHA256 533 else: 534 if debuglog is not None: 535 print >>debuglog, "Unknown signature algorithm (%s)" % sig['a'] 536 return False 537 538 if 'l' in sig: 539 body = body[:int(sig['l'])] 540 541 h = hasher() 542 h.update(body) 543 bodyhash = h.digest() 544 if debuglog is not None: 545 print >>debuglog, "bh:", base64.b64encode(bodyhash) 546 if bodyhash != base64.b64decode(re.sub(r"\s+", "", sig['bh'])): 547 if debuglog is not None: 548 print >>debuglog, "body hash mismatch (got %s, expected %s)" % (base64.b64encode(bodyhash), sig['bh']) 549 return False 550 551 s = dnstxt(sig['s']+"._domainkey."+sig['d']+".") 552 if not s: 553 return False 554 a = re.split(r"\s*;\s*", s) 555 pub = {} 556 for f in a: 557 m = re.match(r"(\w+)=(.*)", f) 558 if m is not None: 559 pub[m.group(1)] = m.group(2) 560 else: 561 if debuglog is not None: 562 print >>debuglog, "invalid format in _domainkey txt record" 563 return False 564 x = asn1_parse(ASN1_Object, base64.b64decode(pub['p'])) 565 # Not sure why the [1:] is necessary to skip a byte. 566 pkd = asn1_parse(ASN1_RSAPublicKey, x[0][1][1:]) 567 pk = { 568 'modulus': pkd[0][0], 569 'publicExponent': pkd[0][1], 570 } 571 modlen = len(int2str(pk['modulus'])) 572 if debuglog is not None: 573 print >>debuglog, "modlen:", modlen 574 575 include_headers = re.split(r"\s*:\s*", sig['h']) 576 if debuglog is not None: 577 print >>debuglog, "include_headers:", include_headers 578 sign_headers = [] 579 lastindex = {} 580 for h in include_headers: 581 i = lastindex.get(h, len(headers)) 582 while i > 0: 583 i -= 1 584 if h.lower() == headers[i][0].lower(): 585 sign_headers.append(headers[i]) 586 break 587 lastindex[h] = i 588 # The call to _remove() assumes that the signature b= only appears once in the signature header 589 sign_headers += [(x[0], x[1].rstrip()) for x in canonicalize_headers.canonicalize_headers([(sigheaders[0][0], _remove(sigheaders[0][1], sig['b']))])] 590 if debuglog is not None: 591 print >>debuglog, "verify headers:", sign_headers 592 593 h = hasher() 594 for x in sign_headers: 595 h.update(x[0]) 596 h.update(":") 597 h.update(x[1]) 598 d = h.digest() 599 if debuglog is not None: 600 print >>debuglog, "verify digest:", " ".join("%02x" % ord(x) for x in d) 601 602 dinfo = asn1_build( 603 (SEQUENCE, [ 604 (SEQUENCE, [ 605 (OBJECT_IDENTIFIER, hashid), 606 (NULL, None), 607 ]), 608 (OCTET_STRING, d), 609 ]) 610 ) 611 if debuglog is not None: 612 print >>debuglog, "dinfo:", " ".join("%02x" % ord(x) for x in dinfo) 613 if len(dinfo)+3 > modlen: 614 if debuglog is not None: 615 print >>debuglog, "Hash too large for modulus" 616 return False 617 sig2 = "\x00\x01"+"\xff"*(modlen-len(dinfo)-3)+"\x00"+dinfo 618 if debuglog is not None: 619 print >>debuglog, "sig2:", " ".join("%02x" % ord(x) for x in sig2) 620 print >>debuglog, sig['b'] 621 print >>debuglog, re.sub(r"\s+", "", sig['b']) 622 v = int2str(pow(str2int(base64.b64decode(re.sub(r"\s+", "", sig['b']))), pk['publicExponent'], pk['modulus']), modlen) 623 if debuglog is not None: 624 print >>debuglog, "v:", " ".join("%02x" % ord(x) for x in v) 625 assert len(v) == len(sig2) 626 # Byte-by-byte compare of signatures 627 return not [1 for x in zip(v, sig2) if x[0] != x[1]]
628 629 if __name__ == "__main__": 630 message = """From: greg@hewgill.com\r\nSubject: test\r\n message\r\n\r\nHi.\r\n\r\nWe lost the game. Are you hungry yet?\r\n\r\nJoe.\r\n""" 631 print rfc822_parse(message) 632 sig = sign(message, "greg", "hewgill.com", open("/home/greg/.domainkeys/rsa.private").read()) 633 print sig 634 print verify(sig+message) 635 #print sign(open("/home/greg/tmp/message").read(), "greg", "hewgill.com", open("/home/greg/.domainkeys/rsa.private").read()) 636