Решение на Регулярни изрази от Стефан Владков

Обратно към всички решения

Към профила на Стефан Владков

Резултати

  • 7 точки от тестове
  • 0 бонус точки
  • 7 точки общо
  • 28 успешни тест(а)
  • 11 неуспешни тест(а)

Код

import re
class PrivacyFilter:
def __init__(self, text):
self.text = text
self.preserve_phone_country_code = False
self.preserve_email_hostname = False
self.partially_preserve_email_username = False
def filtered(self):
filtered_text = self.mail_replaced()
filtered_text = self.phone_replaced(filtered_text)
return filtered_text
def mail_replaced(self):
tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
hostname = '@(?:\w[\w-]{,62}[^-]\.)*'
username = '[a-zA-Z][\w_+\.-]{,200}'
if self.partially_preserve_email_username:
emails = re.findall(username + hostname + tld, self.text)
result = self.text
for email in emails:
if len(re.findall(username, email)[0]) > 6:
next = '(?=' + str(email)[3:] + ')'
result = re.sub(next + username, '[FILTERED]', result)
else:
next = '(?=' + str(email) + ')'
result = re.sub(next + username, '[FILTERED]', result)
return result
elif self.preserve_email_hostname:
emails = re.findall(username + hostname + tld, self.text)
result = self.text
for email in emails:
next = '(?=' + str(email) + ')'
result = re.sub(next + username, '[FILTERED]', result)
return result
else:
return re.sub(username + hostname + tld, '[EMAIL]', self.text)
def phone_replaced(self, text):
prefix = '(?:00|0|\+)'
main_part = '(?:[\(\)\s-]{0,2}[0-9]){6,11}'
if self.preserve_phone_country_code:
phones = re.finditer(prefix + main_part, text)
result = text
return iterate_phones(phones, result)
else:
return re.sub(prefix + main_part, '[PHONE]', text)
def iterate_phones(phones, result):
difference = 0
prefix = '(?:00|0|\+)'
main_part = '(?:[\(\)\s-]{0,2}[0-9]){6,11}'
for phone in phones:
current_prefix = re.match(prefix, phone.group()).group()
if current_prefix != '0':
code_expression = '(?!00|0|\+)[1-9][0-9]{1,2}'
country_code = re.search(code_expression, phone.group())
before_number = result[:(phone.start() - difference)]
after_number = result[(phone.end() - difference):]
filtered = current_prefix + country_code.group() + ' [FILTERED]'
difference += len(phone.group()) - len(filtered)
result = before_number + filtered + after_number
#print(result)
else:
before_number = result[:(phone.start() - difference)]
after_number = result[(phone.end() - difference):]
filtered = re.sub(prefix + main_part, '[PHONE]', phone.group())
difference += len(phone.group()) - len(filtered)
result = before_number + filtered + after_number
return result
class Validations():
@classmethod
def is_email(cls, text):
tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
hostname = '@(?:\w[\w-]{0,62}[^-]\.)+'
username = '[a-zA-Z][\w_\+\.-]{,200}'
matched = re.match(username + hostname + tld, text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_phone(cls, text):
prefix = '(?:00|0|\+)'
#code_expression = '(?!00|0|\+)[1-9][0-9]{1,2}.+'
main_part = '(?:[\(\)\s-]{0,2}[1-9])(?:[\(\)\s-]{0,2}[0-9]){5,10}'
matched = re.match(prefix + main_part, text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_hostname(cls, text):
hostname = '(?:\w[\w-]{0,62}[^-]\.)+'
tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
matched = re.match(hostname + tld, text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_ip_address(cls, text):
less_than_255 = '(2?([0-4]\d|5[0-5])|[01]?\d?\d)'
ip_segment = '(' + less_than_255 + '\.)'
ip_expression = ip_segment + '{3}' + less_than_255
matched = re.match(ip_expression, text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_integer(cls, text):
matched = re.match('-?(0|\d+)', text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_number(cls, text):
matched = re.match('-?(0|\d+)([\.,]\d+)?', text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_date(cls, text):
month = '(0[1-9]|1[0-2])'
less_than_31 = '([0-2]\d|3[0-1])'
matched = re.match('[0-9]{4}-' + month + '-' + less_than_31, text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_time(cls, text):
hours = '([0-1]\d|2[0-3])'
minutes = '([0-5][0-9])'
seconds = minutes
matched = re.match(hours + ':' + minutes + ':' + seconds, text)
if matched is not None:
return text == matched.group()
return False
@classmethod
def is_datetime(cls, text):
if(len(text) < 19):
return False
date = text[:10]
time = text[11:]
if text[10] != ' ' and text[10] != 'T':
return False
return Validations.is_date(date) and Validations.is_time(time)

Лог от изпълнението

F..FFF.F.F.....FF..F...........FF......
======================================================================
FAIL: test_allows_email_hostname_to_be_preserved (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 55, in test_allows_email_hostname_to_be_preserved
    self.assertEqual('[FILTERED]@exa.mple.com', self.filter_email_usernames('some12-+3@exa.mple.com'))
AssertionError: '[FILTERED]@exa.mple.com' != 'some12-+3@exa.mple.com'
- [FILTERED]@exa.mple.com
+ some12-+3@exa.mple.com


======================================================================
FAIL: test_does_not_filter_invalid_emails (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 48, in test_does_not_filter_invalid_emails
    self.assertEqual(text, solution.PrivacyFilter(text).filtered())
AssertionError: 'Contact me here: _invalid@email.com' != 'Contact me here: _[EMAIL]'
- Contact me here: _invalid@email.com
+ Contact me here: _[EMAIL]


======================================================================
FAIL: test_does_not_filter_invalid_phone_numbers (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 86, in test_does_not_filter_invalid_phone_numbers
    self.assertEqual(filtered, solution.PrivacyFilter(text).filtered())
AssertionError: '0005551234569' != '[PHONE]'
- 0005551234569
+ [PHONE]


======================================================================
FAIL: test_filters_more_complex_phone_numbers (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 76, in test_filters_more_complex_phone_numbers
    self.assertEqual(filtered, solution.PrivacyFilter(text).filtered())
AssertionError: '[PHONE]' != '[PHONE]9'
- [PHONE]
+ [PHONE]9
?        +


======================================================================
FAIL: test_obfuscates_more_complicated_emails (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 37, in test_obfuscates_more_complicated_emails
    self.assertEqual(filtered, solution.PrivacyFilter(text).filtered())
AssertionError: 'Contact: [EMAIL],[EMAIL]' != 'Contact: 1[EMAIL],[EMAIL]'
- Contact: [EMAIL],[EMAIL]
+ Contact: 1[EMAIL],[EMAIL]
?          +


======================================================================
FAIL: test_preserves_whitespace_around_phones (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 89, in test_preserves_whitespace_around_phones
    self.assertEqual(' [PHONE] or...', solution.PrivacyFilter(' +359881212-12-1 2 or...').filtered())
AssertionError: ' [PHONE] or...' != ' [PHONE]-1 2 or...'
-  [PHONE] or...
+  [PHONE]-1 2 or...
?         ++++


======================================================================
FAIL: test_can_validate_more_complex_emails (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 124, in test_can_validate_more_complex_emails
    self.assertIs(solution.Validations.is_email(email), valid)
AssertionError: False is not True

======================================================================
FAIL: test_can_validate_more_complex_phone_numbers (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 160, in test_can_validate_more_complex_phone_numbers
    self.assertIs(solution.Validations.is_phone(phone), valid)
AssertionError: False is not True

======================================================================
FAIL: test_does_not_allow_zero_months_or_days_in_dates (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 250, in test_does_not_allow_zero_months_or_days_in_dates
    self.assertFalse(solution.Validations.is_date('1000-01-00'))
AssertionError: True is not false

======================================================================
FAIL: test_validates_datetime_values (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 282, in test_validates_datetime_values
    self.assertFalse(solution.Validations.is_datetime('2012-01-00T23:59:00'))
AssertionError: True is not false

======================================================================
FAIL: test_validates_hostnames (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-1q4zv2p/test.py", line 169, in test_validates_hostnames
    self.assertTrue(solution.Validations.is_hostname('1.2.3.4.xip.io'))
AssertionError: False is not true

----------------------------------------------------------------------
Ran 39 tests in 0.044s

FAILED (failures=11)

История (1 версия и 0 коментара)

Стефан обнови решението на 23.04.2014 13:33 (преди над 10 години)

+import re
+
+
+class PrivacyFilter:
+ def __init__(self, text):
+ self.text = text
+ self.preserve_phone_country_code = False
+ self.preserve_email_hostname = False
+ self.partially_preserve_email_username = False
+
+ def filtered(self):
+ filtered_text = self.mail_replaced()
+ filtered_text = self.phone_replaced(filtered_text)
+ return filtered_text
+
+ def mail_replaced(self):
+ tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
+ hostname = '@(?:\w[\w-]{,62}[^-]\.)*'
+ username = '[a-zA-Z][\w_+\.-]{,200}'
+ if self.partially_preserve_email_username:
+ emails = re.findall(username + hostname + tld, self.text)
+ result = self.text
+ for email in emails:
+ if len(re.findall(username, email)[0]) > 6:
+ next = '(?=' + str(email)[3:] + ')'
+ result = re.sub(next + username, '[FILTERED]', result)
+ else:
+ next = '(?=' + str(email) + ')'
+ result = re.sub(next + username, '[FILTERED]', result)
+ return result
+ elif self.preserve_email_hostname:
+ emails = re.findall(username + hostname + tld, self.text)
+ result = self.text
+ for email in emails:
+ next = '(?=' + str(email) + ')'
+ result = re.sub(next + username, '[FILTERED]', result)
+ return result
+ else:
+ return re.sub(username + hostname + tld, '[EMAIL]', self.text)
+
+ def phone_replaced(self, text):
+ prefix = '(?:00|0|\+)'
+ main_part = '(?:[\(\)\s-]{0,2}[0-9]){6,11}'
+ if self.preserve_phone_country_code:
+ phones = re.finditer(prefix + main_part, text)
+ result = text
+ return iterate_phones(phones, result)
+ else:
+ return re.sub(prefix + main_part, '[PHONE]', text)
+
+
+def iterate_phones(phones, result):
+ difference = 0
+ prefix = '(?:00|0|\+)'
+ main_part = '(?:[\(\)\s-]{0,2}[0-9]){6,11}'
+ for phone in phones:
+ current_prefix = re.match(prefix, phone.group()).group()
+ if current_prefix != '0':
+ code_expression = '(?!00|0|\+)[1-9][0-9]{1,2}'
+ country_code = re.search(code_expression, phone.group())
+ before_number = result[:(phone.start() - difference)]
+ after_number = result[(phone.end() - difference):]
+ filtered = current_prefix + country_code.group() + ' [FILTERED]'
+ difference += len(phone.group()) - len(filtered)
+ result = before_number + filtered + after_number
+ #print(result)
+ else:
+ before_number = result[:(phone.start() - difference)]
+ after_number = result[(phone.end() - difference):]
+ filtered = re.sub(prefix + main_part, '[PHONE]', phone.group())
+ difference += len(phone.group()) - len(filtered)
+ result = before_number + filtered + after_number
+ return result
+
+
+class Validations():
+ @classmethod
+ def is_email(cls, text):
+ tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
+ hostname = '@(?:\w[\w-]{0,62}[^-]\.)+'
+ username = '[a-zA-Z][\w_\+\.-]{,200}'
+ matched = re.match(username + hostname + tld, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_phone(cls, text):
+ prefix = '(?:00|0|\+)'
+ #code_expression = '(?!00|0|\+)[1-9][0-9]{1,2}.+'
+ main_part = '(?:[\(\)\s-]{0,2}[1-9])(?:[\(\)\s-]{0,2}[0-9]){5,10}'
+ matched = re.match(prefix + main_part, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_hostname(cls, text):
+ hostname = '(?:\w[\w-]{0,62}[^-]\.)+'
+ tld = '[a-zA-Z]{2,3}(?:[a-zA-Z]{2})?'
+ matched = re.match(hostname + tld, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_ip_address(cls, text):
+ less_than_255 = '(2?([0-4]\d|5[0-5])|[01]?\d?\d)'
+ ip_segment = '(' + less_than_255 + '\.)'
+ ip_expression = ip_segment + '{3}' + less_than_255
+ matched = re.match(ip_expression, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_integer(cls, text):
+ matched = re.match('-?(0|\d+)', text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_number(cls, text):
+ matched = re.match('-?(0|\d+)([\.,]\d+)?', text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_date(cls, text):
+ month = '(0[1-9]|1[0-2])'
+ less_than_31 = '([0-2]\d|3[0-1])'
+ matched = re.match('[0-9]{4}-' + month + '-' + less_than_31, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_time(cls, text):
+ hours = '([0-1]\d|2[0-3])'
+ minutes = '([0-5][0-9])'
+ seconds = minutes
+ matched = re.match(hours + ':' + minutes + ':' + seconds, text)
+ if matched is not None:
+ return text == matched.group()
+ return False
+
+ @classmethod
+ def is_datetime(cls, text):
+ if(len(text) < 19):
+ return False
+ date = text[:10]
+ time = text[11:]
+ if text[10] != ' ' and text[10] != 'T':
+ return False
+ return Validations.is_date(date) and Validations.is_time(time)