Mail charset error support
- Try multiple charset when no charset is specified by "Content-Type" field : begins with sys.defaultencoding utf8 then "iso-8859-1", config file encoding and finally encoding found in previous fields "Subject" or "From". darcs-hash:20060201110843-86b55-7fd556a8ffadd9a5fed2b3317b17bfcca9d1dc58.gz
This commit is contained in:
@@ -37,6 +37,8 @@ POP3_TIMEOUT = 10
|
||||
DO_NOTHING = 0
|
||||
DIGEST = 1
|
||||
RETRIEVE = 2
|
||||
default_encoding = "iso-8859-1"
|
||||
|
||||
## All MY* classes are implemented to add a timeout (settimeout)
|
||||
## while connecting
|
||||
class MYIMAP4(imaplib.IMAP4):
|
||||
@@ -192,41 +194,82 @@ class MailConnection(object):
|
||||
+ str(self.online_action) + "#" + str(self.away_action) + "#" + \
|
||||
str(self.xa_action) + "#" + str(self.dnd_action) + "#" + str(self.offline_action) + "#" + str(self.interval) + "#" + str(self.live_email_only)
|
||||
|
||||
def get_decoded_part(self, part):
|
||||
def get_decoded_part(self, part, charset_hint):
|
||||
content_charset = part.get_content_charset()
|
||||
if content_charset:
|
||||
return part.get_payload(decode=True).decode(content_charset)
|
||||
return unicode(part.get_payload(decode=True).decode(content_charset))
|
||||
else:
|
||||
return part.get_payload(decode=True)
|
||||
result = ""
|
||||
try:
|
||||
result = unicode(part.get_payload(decode=True))
|
||||
except Exception, e:
|
||||
try:
|
||||
result = unicode(part.get_payload(decode=True).decode("iso-8859-1"))
|
||||
except Exception, e:
|
||||
try:
|
||||
result = unicode(part.get_payload(decode=True).decode(default_encoding))
|
||||
except Exception, e:
|
||||
if charset_hint is not None:
|
||||
try:
|
||||
result = unicode(part.get_payload(decode=True).decode(charset_hint))
|
||||
except Exception, e:
|
||||
print e
|
||||
return result
|
||||
|
||||
def format_message(self, email_msg, include_body = True):
|
||||
from_decoded = email.Header.decode_header(email_msg["From"])
|
||||
charset_hint = None
|
||||
result = u"From : "
|
||||
for i in range(len(from_decoded)):
|
||||
if from_decoded[i][1]:
|
||||
charset_hint = from_decoded[i][1]
|
||||
result += unicode(from_decoded[i][0].decode(from_decoded[i][1]))
|
||||
else:
|
||||
try:
|
||||
result += unicode(from_decoded[i][0])
|
||||
except Exception,e:
|
||||
try:
|
||||
result += unicode(from_decoded[i][0].decode("iso-8859-1"))
|
||||
except Exception, e:
|
||||
try:
|
||||
result += unicode(from_decoded[i][0].decode(default_encoding))
|
||||
except Exception, e:
|
||||
print e
|
||||
result += "\n"
|
||||
|
||||
subject_decoded = email.Header.decode_header(email_msg["Subject"])
|
||||
result += u"Subject : "
|
||||
for i in range(len(subject_decoded)):
|
||||
if subject_decoded[i][1]:
|
||||
charset_hint = subject_decoded[i][1]
|
||||
result += unicode(subject_decoded[i][0].decode(subject_decoded[i][1]))
|
||||
else:
|
||||
try:
|
||||
result += unicode(subject_decoded[i][0])
|
||||
except Exception,e:
|
||||
try:
|
||||
result += unicode(subject_decoded[i][0].decode("iso-8859-1"))
|
||||
except Exception, e:
|
||||
try:
|
||||
result += unicode(subject_decoded[i][0].decode(default_encoding))
|
||||
except Exception, e:
|
||||
if charset_hint is not None:
|
||||
try:
|
||||
result += unicode(subject_decoded[i][0].decode(charset_hint))
|
||||
except Exception, e:
|
||||
print e
|
||||
|
||||
result += u"\n\n"
|
||||
|
||||
if include_body:
|
||||
action = {
|
||||
"text/plain" : lambda part: self.get_decoded_part(part),
|
||||
"text/plain" : lambda part: self.get_decoded_part(part, charset_hint),
|
||||
"text/html" : lambda part: "\n<<<HTML part skipped>>>\n"
|
||||
}
|
||||
for part in email_msg.walk():
|
||||
content_type = part.get_content_type()
|
||||
if action.has_key(content_type):
|
||||
result += unicode(action[content_type](part)) + u'\n'
|
||||
result += action[content_type](part) + u'\n'
|
||||
return result
|
||||
|
||||
def format_message_summary(self, email_msg):
|
||||
|
||||
2
jmc.py
2
jmc.py
@@ -26,6 +26,7 @@ import sys
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
from jabber import mailconnection
|
||||
from jabber.component import MailComponent, ComponentFatalError
|
||||
from jabber.config import Config
|
||||
|
||||
@@ -46,6 +47,7 @@ def main(config_file = "jmc.xml", isDebug = 0):
|
||||
str(sys.exc_value)
|
||||
sys.exit(1)
|
||||
|
||||
mailconnection.default_encoding = config.get_content("config/mail_default_encoding")
|
||||
print "creating component..."
|
||||
mailcomp = MailComponent(config)
|
||||
|
||||
|
||||
1
jmc.xml
1
jmc.xml
@@ -16,4 +16,5 @@
|
||||
<spooldir>/var/spool/jabber</spooldir>
|
||||
<!-- default check interval in minutes -->
|
||||
<check_interval>5</check_interval>
|
||||
<mail_default_encoding>iso-8859-1</mail_default_encoding>
|
||||
</config>
|
||||
|
||||
@@ -70,9 +70,9 @@ if __name__ == '__main__':
|
||||
storage_suite, \
|
||||
dbmstorage_suite, \
|
||||
sqlitestorage_suite))
|
||||
# test_support.run_suite(mail_connection_suite)
|
||||
# test_support.run_suite(pop3_connection_suite)
|
||||
# test_support.run_suite(imap_connection_suite)
|
||||
#test_support.run_suite(mail_connection_suite)
|
||||
#test_support.run_suite(pop3_connection_suite)
|
||||
#test_support.run_suite(imap_connection_suite)
|
||||
#test_support.run_suite(mc_factory_suite)
|
||||
#test_support.run_suite(component_suite)
|
||||
#test_support.run_suite(component2_suite)
|
||||
|
||||
@@ -32,6 +32,9 @@ def _create_multipart(encoded):
|
||||
part2 = MIMEText("Encoded multipart2 with 'iso-8859-15' charset (<28><><EFBFBD>)", \
|
||||
_charset = "iso-8859-15")
|
||||
msg.attach(part2)
|
||||
part3 = MIMEText("Encoded multipart3 with no charset (<28><><EFBFBD>)", \
|
||||
_charset = "")
|
||||
msg.attach(part3)
|
||||
else:
|
||||
part1 = MIMEText("Not encoded multipart1")
|
||||
msg.attach(part1)
|
||||
|
||||
@@ -15,4 +15,5 @@
|
||||
<storage>SQLite</storage>
|
||||
<spooldir>.</spooldir>
|
||||
<check_interval>5</check_interval> <!-- in minutes -->
|
||||
<mail_default_encoding>iso-8859-15</mail_default_encoding>
|
||||
</config>
|
||||
|
||||
@@ -48,12 +48,12 @@ class MailConnection_TestCase(unittest.TestCase):
|
||||
|
||||
test_get_decoded_part_not_encoded = \
|
||||
make_test((False, False, False), \
|
||||
lambda self, email: self.connection.get_decoded_part(email), \
|
||||
lambda self, email: self.connection.get_decoded_part(email, None), \
|
||||
u"Not encoded single part")
|
||||
|
||||
test_get_decoded_part_encoded = \
|
||||
make_test((True, False, False), \
|
||||
lambda self, email: self.connection.get_decoded_part(email), \
|
||||
lambda self, email: self.connection.get_decoded_part(email, None), \
|
||||
u"Encoded single part with 'iso-8859-15' charset (éàê)")
|
||||
|
||||
test_format_message_summary_not_encoded = \
|
||||
@@ -104,7 +104,8 @@ class MailConnection_TestCase(unittest.TestCase):
|
||||
lambda self, email: self.connection.format_message(email), \
|
||||
u"From : encoded from (éàê)\nSubject : encoded subject (éà" + \
|
||||
u"ê)\n\nutf-8 multipart1 with no charset (éàê)" + \
|
||||
u"\nEncoded multipart2 with 'iso-8859-15' charset (éàê)\n")
|
||||
u"\nEncoded multipart2 with 'iso-8859-15' charset (éàê)\n" + \
|
||||
u"Encoded multipart3 with no charset (éàê)\n")
|
||||
|
||||
|
||||
class POP3Connection_TestCase(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user