Ирина обнови решението на 23.04.2014 13:20 (преди над 10 години)
+import re
+
+
+class PrivacyFilter():
+
+ def __init__(self, text):
+ self.text = text
+ self.preserve_phone_country_code = False
+ self.preserve_email_hostname = False
+ self.partially_preserve_email_username = False
+
+ def filtered(self):
+ text = self.text
+ all_emails = re.findall(r'\b\S+@\S+\b', self.text)
+ validate_emails = [x for x in all_emails if Validations.is_email(x)]
+ if validate_emails:
+ for validate_email in validate_emails:
+ hostname = re.search(r'@[\w\.-]+',
+ r'%s' % (validate_email)).group()
+ user_name = re.search(r'[\w_\+\.-]+(?=@)',
+ r'%s' % (validate_email)).group()
+ if self.partially_preserve_email_username:
+ if len(user_name) >= 6:
+ text = re.sub(re.escape(validate_email),
+ user_name[:3] +
+ r'[FILTERED]' +
+ hostname, text)
+ else:
+ text = re.sub(re.escape(validate_email),
+ r'[FILTERED]' + hostname, text)
+ elif self.preserve_email_hostname:
+ text = re.sub(re.escape(validate_email),
+ r'[FILTERED]' + hostname, text)
+ else:
+ text = re.sub(re.escape(validate_email), r'[EMAIL]', text)
+
+ all_phones = re.findall(r'\+?\d[\d\(\)\s-]+\d\b', self.text)
+ validate_phones = [x for x in all_phones if Validations.is_phone(x)]
+ if validate_phones:
+ for validate_phone in validate_phones:
+ if not self.preserve_phone_country_code:
+ text = re.sub(re.escape(validate_phone), r'[PHONE]', text)
+ else:
+ if re.search(r'^(00|\+)', validate_phone):
+ country_code = re.search(r'^(00|\+)[0-9]{1,3}',
+ validate_phone).group()
+ text = re.sub(re.escape(validate_phone),
+ country_code + r' [FILTERED]', text)
+ else:
+ text = re.sub(re.escape(validate_phone),
+ r' [FILTERED]', text)
+ return text
+
+
+class Validations():
+
+ @classmethod
+ def is_hostname(cls, hostname):
+ validate_hostname = re.search(r'^(\w[\w-]{,61}\w\.){1,}' +
+ '([a-zA-Z]{2,3}(\.[a-zA-Z]{2})?)$',
+ hostname)
+ return bool(validate_hostname)
+
+ @classmethod
+ def is_email(cls, email):
+ splitted_email = re.split('@', email)
+ return len(splitted_email) == 2 and \
+ bool(re.match(r'\w[\w_\+\.-]{,200}', splitted_email[0])) and \
+ Validations.is_hostname(splitted_email[1])
+
+ @classmethod
+ def is_phone(cls, phone):
+ return bool(re.search(r'^(0|00\d{1,3}|\+[1-9]\d{,2})' +
+ '(([\s\(\)-]{,2}\d){5,10}[0-9])$', phone))
+
+ @classmethod
+ def is_ip_address(cls, ip_address):
+ bytes_ip_address = re.split("\.", ip_address)
+ return len(bytes_ip_address) == 4 and \
+ len(list(filter(lambda x: re.search(r'^(0|[1-9][1-9]?|' +
+ '1[0-9][0-9]|2[0-4][0-9]|' +
+ '25[0-5])$', x),
+ bytes_ip_address))) == 4
+
+ @classmethod
+ def is_number(cls, number):
+ return bool(re.search(r'^(-?0|-?0\.\d+|-?[1-9](\d+)?(\.\d+)?)$',
+ number))
+
+ @classmethod
+ def is_integer(cls, integer):
+ return Validations.is_number(integer) and \
+ not bool(re.search(r'\.', integer))
+
+ @classmethod
+ def is_date(cls, date):
+ return bool(re.search(r'^[0-9]{4}-(0[0-9]|1[0-2])-(0[1-9]|' +
+ '[1-2][0-9]|3[0-1])$', date))
+
+ @classmethod
+ def is_time(cls, time):
+ return bool(re.search(r'^([0-1][0-9]|2[0-3])(:[0-5][0-9]){2}$',
+ time))
+
+ @classmethod
+ def is_datetime(cls, datetime):
+ if re.search(r' ', datetime):
+ return Validations.is_date(re.split(r' ', datetime)[0]) and \
+ Validations.is_time(re.split(r' ', datetime)[1])
+ elif re.search(r'T', datetime):
+ return Validations.is_date(re.split(r'T', datetime)[0]) and \
+ Validations.is_time(re.split(r'T', datetime)[1])
+ return False