Стефан обнови решението на 23.04.2014 13:33 (преди над 10 години)
+import re
+
+
+class PrivacyFilter:
+ def __init__(self, text):
+ self.text = text
+ self.preserve_phone_country_code = False
+ self.preserve_email_hostname = False
+ self.partially_preserve_email_username = False
+
+ def filtered(self):
+ filtered_text = self.mail_replaced()
+ filtered_text = self.phone_replaced(filtered_text)
+ return filtered_text
+
+ def mail_replaced(self):
+ tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
+ hostname = '@(?:\w[\w-]{,62}[^-]\.)*'
+ username = '[a-zA-Z][\w_+\.-]{,200}'
+ if self.partially_preserve_email_username:
+ emails = re.findall(username + hostname + tld, self.text)
+ result = self.text
+ for email in emails:
+ if len(re.findall(username, email)[0]) > 6:
+ next = '(?=' + str(email)[3:] + ')'
+ result = re.sub(next + username, '[FILTERED]', result)
+ else:
+ next = '(?=' + str(email) + ')'
+ result = re.sub(next + username, '[FILTERED]', result)
+ return result
+ elif self.preserve_email_hostname:
+ emails = re.findall(username + hostname + tld, self.text)
+ result = self.text
+ for email in emails:
+ next = '(?=' + str(email) + ')'
+ result = re.sub(next + username, '[FILTERED]', result)
+ return result
+ else:
+ return re.sub(username + hostname + tld, '[EMAIL]', self.text)
+
+ def phone_replaced(self, text):
+ prefix = '(?:00|0|\+)'
+ main_part = '(?:[\(\)\s-]{0,2}[0-9]){6,11}'
+ if self.preserve_phone_country_code:
+ phones = re.finditer(prefix + main_part, text)
+ result = text
+ return iterate_phones(phones, result)
+ else:
+ return re.sub(prefix + main_part, '[PHONE]', text)
+
+
+def iterate_phones(phones, result):
+ difference = 0
+ prefix = '(?:00|0|\+)'
+ main_part = '(?:[\(\)\s-]{0,2}[0-9]){6,11}'
+ for phone in phones:
+ current_prefix = re.match(prefix, phone.group()).group()
+ if current_prefix != '0':
+ code_expression = '(?!00|0|\+)[1-9][0-9]{1,2}'
+ country_code = re.search(code_expression, phone.group())
+ before_number = result[:(phone.start() - difference)]
+ after_number = result[(phone.end() - difference):]
+ filtered = current_prefix + country_code.group() + ' [FILTERED]'
+ difference += len(phone.group()) - len(filtered)
+ result = before_number + filtered + after_number
+ #print(result)
+ else:
+ before_number = result[:(phone.start() - difference)]
+ after_number = result[(phone.end() - difference):]
+ filtered = re.sub(prefix + main_part, '[PHONE]', phone.group())
+ difference += len(phone.group()) - len(filtered)
+ result = before_number + filtered + after_number
+ return result
+
+
+class Validations():
+ @classmethod
+ def is_email(cls, text):
+ tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
+ hostname = '@(?:\w[\w-]{0,62}[^-]\.)+'
+ username = '[a-zA-Z][\w_\+\.-]{,200}'
+ matched = re.match(username + hostname + tld, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_phone(cls, text):
+ prefix = '(?:00|0|\+)'
+ #code_expression = '(?!00|0|\+)[1-9][0-9]{1,2}.+'
+ main_part = '(?:[\(\)\s-]{0,2}[1-9])(?:[\(\)\s-]{0,2}[0-9]){5,10}'
+ matched = re.match(prefix + main_part, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_hostname(cls, text):
+ hostname = '(?:\w[\w-]{0,62}[^-]\.)+'
+ tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
+ matched = re.match(hostname + tld, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_ip_address(cls, text):
+ less_than_255 = '(2?([0-4]\d|5[0-5])|[01]?\d?\d)'
+ ip_segment = '(' + less_than_255 + '\.)'
+ ip_expression = ip_segment + '{3}' + less_than_255
+ matched = re.match(ip_expression, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_integer(cls, text):
+ matched = re.match('-?(0|\d+)', text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_number(cls, text):
+ matched = re.match('-?(0|\d+)([\.,]\d+)?', text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_date(cls, text):
+ month = '(0[1-9]|1[0-2])'
+ less_than_31 = '([0-2]\d|3[0-1])'
+ matched = re.match('[0-9]{4}-' + month + '-' + less_than_31, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_time(cls, text):
+ hours = '([0-1]\d|2[0-3])'
+ minutes = '([0-5][0-9])'
+ seconds = minutes
+ matched = re.match(hours + ':' + minutes + ':' + seconds, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_datetime(cls, text):
+ if(len(text) < 19):
+ return False
+ date = text[:10]
+ time = text[11:]
+ if text[10] != ' ' and text[10] != 'T':
+ return False
+ return Validations.is_date(date) and Validations.is_time(time)