Димитър обнови решението на 20.04.2014 12:40 (преди над 10 години)
+import re
+
+PHONE_PATTERN = r'(0[1-9]([0-9][' '\-\(\)]{0,2}){5,10})|((00|\+)[1-9][0-9]{0,2}[' '\-\(\)]{0,2}([0-9][' '\-\(\)]{0,2}){5,10})'
+LOCAL_PHONE_PATTERN = r'0[1-9]([0-9][' '\-\(\)]{0,2}){5,10}'
+INTERNATIONAL_PHONE_PATTERN = r'(00|\+)[1-9][0-9]{0,2}[' '\-\(\)]{0,2}([0-9][' '\-\(\)]{0,2}){5,10}'
+EMAIL_PATTERN = r'[a-zA-Z0-9][a-zA-Z0-9\-\+\._]{0,200}@[a-zA-Z0-9][a-zA-Z0-9\-]{0,62}[a-zA-Z0-9](\.[a-zA-Z0-9][a-zA-Z0-9\-]{0,62}[a-zA-Z0-9])*\.[a-zA-Z]{2,3}'
+
+
+class PrivacyFilter:
+ def __init__(self, text):
+ self.text = text
+ self.preserve_phone_country_code = False
+ self.preserve_email_hostname = False
+ self.partially_preserve_email_username = False
+
+ def filtered(self):
+ new_text = self.text
+
+ if self.preserve_email_hostname == False and self.partially_preserve_email_username == False:
+ email = matcher(EMAIL_PATTERN, new_text)
+ while Validations.is_email(email):
+ new_text = new_text.replace(email, '[EMAIL]')
+ email = matcher(EMAIL_PATTERN, new_text)
+ elif self.preserve_email_hostname == True and self.partially_preserve_email_username == False:
+ email = matcher(EMAIL_PATTERN, new_text)
+ while Validations.is_email(email):
+ hiphens = email.index('@')
+ filtered_email = '[FILTERED]' + email[hiphens:len(email)]
+ new_text = new_text.replace(email, filtered_email)
+ email = matcher(EMAIL_PATTERN, new_text)
+ elif self.preserve_email_hostname == False and self.partially_preserve_email_username == True:
+ email = matcher(EMAIL_PATTERN, new_text)
+ while Validations.is_email(email):
+ hiphens = email.index('@')
+ mail_name = email[0:hiphens]
+ if len(mail_name) < 6:
+ new_text = new_text.replace(email, '[FILTERED]' + email[hiphens:len(email)])
+ email = matcher(EMAIL_PATTERN, new_text)
+ else:
+ new_mail = mail_name[0:3] + '[FILTERED]' + email[hiphens:len(email)]
+ new_text = new_text.replace(email, new_mail)
+ email = matcher(EMAIL_PATTERN, new_text)
+
+
+ if self.preserve_phone_country_code == False:
+ phone = matcher(PHONE_PATTERN, self.text)
+ while Validations.is_phone(phone):
+ new_text = new_text.replace(phone, '[PHONE]')
+ phone = matcher(PHONE_PATTERN, new_text)
+ else:
+ phone = matcher(INTERNATIONAL_PHONE_PATTERN, new_text)
+ while Validations.is_phone(phone):
+ country_code = matcher(r'(00|\+)[1-9][0-9]{0,2}', phone)
+ filtered_phone = country_code + " [FILTERED]"
+ new_text = new_text.replace(phone, filtered_phone)
+ phone = matcher(INTERNATIONAL_PHONE_PATTERN, new_text)
+
+ phone = matcher(LOCAL_PHONE_PATTERN, self.text)
+ while Validations.is_phone(phone):
+ new_text = new_text.replace(phone, '[PHONE]')
+ phone = matcher(LOCAL_PHONE_PATTERN, new_text)
+
+
+ return new_text
+
+def matcher(regex, string):
+ match = re.search(regex, string)
+ if match is None: return string
+ start, end = match.span()
+ return string[start:end]
+
+
+class Validations:
+ @staticmethod
+ def is_email(value):
+ try:
+ name = re.split('@', value)[0] + "@"
+ hostname = re.split('@', value)[1]
+ except:
+ return False
+ #print(name)
+ #print(hostname)
+ if re.match("^[a-zA-Z0-9][a-zA-Z0-9\-\+\._]{0,200}@$", name) != None and Validations.is_hostname(hostname) == True:
+ return True
+ return False
+
+ @staticmethod
+ def is_phone(value):
+ #local phone
+ if re.match("^0[1-9]([0-9][' '\-\(\)]{0,2}){5,10}$", value) != None:
+ return True
+ #international phone
+ if re.match("^(00|\+)[1-9][0-9]{0,2}[' '\-\(\)]{0,2}([0-9][' '\-\(\)]{0,2}){5,10}$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_hostname(value):
+ if re.match("^[a-zA-Z0-9][a-zA-Z0-9\-]{0,62}[a-zA-Z0-9](\.[a-zA-Z0-9][a-zA-Z0-9\-]{0,62}[a-zA-Z0-9])*\.[a-zA-Z]{2,3}$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_ip_address(value):
+ if re.match("^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_number(value):
+ if re.match("^\-{0,1}(0\.[0-9]*|[1-9][0-9]*\.{0,1}[0-9]*)$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_integer(value):
+ if re.match("^\-{0,1}(0|[1-9][0-9]*)$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_date(value):
+ if re.match("^[1-9][0-9]{0,3}\-(0[1-9]|1[0-2])\-(0[1-9]|1[0-9]|2[0-9]|3[0-1])$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_time(value):
+ if re.match("^(0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]$", value) != None:
+ return True
+ return False
+
+ @staticmethod
+ def is_datetime(value):
+ try:
+ splitted = re.split(' |T', value)
+ while ' ' in splitted:
+ splitted.remove(' ')
+ while '' in splitted:
+ splitted.remove('')
+ date = splitted[0]
+ time = splitted[1]
+ except:
+ return False
+ if Validations.is_date(date) and Validations.is_time(time):
+ return True
+ return False
- Регулярните ти изрази са безумно дълги. Погледни
re.VERBOSE
- Освен това някои от тях не са raw string-ове
- Използвай
\b
за разни неща - Опрости кода на метода
filtered
. -
Този израз:
if re.match("^...$", value) != None: return True return False
Не ти ли напомня на този известен нон-сенс?
if True:
return True
else
return False