Решение на Регулярни изрази от Атанас Пройчев

Обратно към всички решения

Към профила на Атанас Пройчев

Резултати

  • 7 точки от тестове
  • 0 бонус точки
  • 7 точки общо
  • 29 успешни тест(а)
  • 10 неуспешни тест(а)

Код

import re
class PrivacyFilter:
EMAIL_PATTERN = (r'\b(?P<username>'
r'(?P<first3>[a-zA-Z0-9][a-zA-Z0-9_+.-]{2})'
r'[a-zA-Z0-9_+.-]{,198})@(?P<host>\S+)\b')
PHONE_PATTERN = (r'(0[^0]|(?P<code>(00|\+)[1-9]\d{,2}))'
r'([- \(\)]{,2}\d){5,10}[^- \(\)]\b')
def __init__(self, text):
self.text = text
self.preserve_phone_country_code = False
self.preserve_email_hostname = False
self.partially_preserve_email_username = False
def filtered(self):
filtered_text = re.sub(self.EMAIL_PATTERN, self.filter_email,
self.text)
return re.sub(self.PHONE_PATTERN, self.filter_phone,
filtered_text)
def filter_email(self, matchobj):
if not Validations.is_hostname(matchobj.group('host')):
return matchobj.group(0)
if self.preserve_email_hostname:
return "[FILTERED]@{}".format(matchobj.group('host'))
if self.partially_preserve_email_username:
if len(matchobj.group('username')) < 6:
return "[FILTERED]@{}".format(matchobj.group('host'))
else:
return "{}[FILTERED]@{}".format(matchobj.group('first3'),
matchobj.group('host'))
return "[EMAIL]"
def filter_phone(self, matchobj):
if self.preserve_phone_country_code:
code = matchobj.group('code')
if code and len(code) > 0:
return "{} [FILTERED]".format(matchobj.group('code'))
return "[PHONE]"
class Validations:
@classmethod
def is_email(cls, text):
pattern = r'^[a-zA-Z0-9][a-zA-Z0-9_+.-]{,200}@(.+)$'
match = re.search(pattern, text)
if match is None:
return False
else:
return cls.is_hostname(match.groups()[0])
@classmethod
def is_hostname(cls, text):
pattern = (r'^([0-9a-zA-Z][0-9a-zA-Z-]{,62}[^-])+\.[a-zA-Z]{2,3}'
r'(\.[a-zA-Z]{2})?$')
match = re.search(pattern, text)
return match is not None
@classmethod
def is_phone(cls, text):
pattern = (r'^(0[^0]|(00|\+)[1-9]\d{,2})'
r'([- \(\)]{,2}\d){5,10}[^- \(\)]$')
match = re.search(pattern, text)
return match is not None
@classmethod
def is_ip_address(cls, text):
pattern = r'^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$'
match = re.search(pattern, text)
if match is None:
return False
else:
return all([int(number) >= 0 and int(number) < 256
for number in match.groups()])
@classmethod
def is_integer(cls, text, part=False):
if part:
pattern = r'^\d*$'
else:
pattern = r'^-?[1-9]\d*$'
pattern_zero = r'^-?0$'
if re.search(pattern_zero, text) is None:
match = re.search(pattern, text)
else:
return True
return match is not None
@classmethod
def is_number(cls, text):
pattern = r'^(-?\d+)(\.(\d+))?$'
match = re.search(pattern, text)
if match is None:
return False
else:
if match.group(3) is None:
return cls.is_integer(match.group(1))
else:
return (cls.is_integer(match.group(1))
and cls.is_integer(match.group(3), True))
@classmethod
def is_date(cls, text):
pattern = r'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$'
match = re.search(pattern, text)
if match is None:
return False
else:
return (1 <= int(match.group('month')) <= 12
and 1 <= int(match.group('day')) <= 31)
@classmethod
def is_time(cls, text):
pattern = r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
match = re.search(pattern, text)
if match is None:
return False
else:
return (0 <= int(match.group('hour')) <= 23
and 0 <= int(match.group('min')) <= 59
and 0 <= int(match.group('sec')) <= 59)
@classmethod
def is_datetime(cls, text):
pattern = r'^(?P<date>.+)[T| ](?P<time>.+)$'
match = re.search(pattern, text)
if match is None:
return False
else:
return (cls.is_date(match.group('date'))
and cls.is_time(match.group('time')))

Лог от изпълнението

..F.F.FF.F............FF..FF....F......
======================================================================
FAIL: test_does_not_brake_with_unicode (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 64, in test_does_not_brake_with_unicode
    self.assertEqual('За връзка: [FILTERED]@example.com', self.partially_filter_email_usernames('За връзка: me@example.com'))
AssertionError: 'За връзка: [FILTERED]@example.com' != 'За връзка: me@example.com'
- За връзка: [FILTERED]@example.com
?            ^^^^^^^^^^
+ За връзка: me@example.com
?            ^^


======================================================================
FAIL: test_does_not_filter_invalid_phone_numbers (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 86, in test_does_not_filter_invalid_phone_numbers
    self.assertEqual(filtered, solution.PrivacyFilter(text).filtered())
AssertionError: '0005551234569' != '0[PHONE]'
- 0005551234569
+ 0[PHONE]


======================================================================
FAIL: test_filters_whole_email_usernames_if_too_short (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 61, in test_filters_whole_email_usernames_if_too_short
    self.assertEqual('[FILTERED]@example.com', self.partially_filter_email_usernames('me@example.com'))
AssertionError: '[FILTERED]@example.com' != 'me@example.com'
- [FILTERED]@example.com
+ me@example.com


======================================================================
FAIL: test_obfuscates_more_complicated_emails (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 37, in test_obfuscates_more_complicated_emails
    self.assertEqual(filtered, solution.PrivacyFilter(text).filtered())
AssertionError: 'Contact: [EMAIL],[EMAIL]' != 'Contact: [EMAIL]'
- Contact: [EMAIL],[EMAIL]
?          --------
+ Contact: [EMAIL]


======================================================================
FAIL: test_preserves_whitespace_around_phones (test.PrivacyFilterTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 89, in test_preserves_whitespace_around_phones
    self.assertEqual(' [PHONE] or...', solution.PrivacyFilter(' +359881212-12-1 2 or...').filtered())
AssertionError: ' [PHONE] or...' != ' [PHONE]-1 2 or...'
-  [PHONE] or...
+  [PHONE]-1 2 or...
?         ++++


======================================================================
FAIL: test_handles_multiline_strings_in_IP_validation_properly (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 189, in test_handles_multiline_strings_in_IP_validation_properly
    self.assertFalse(solution.Validations.is_ip_address("8.8.8.8\n"))
AssertionError: True is not false

======================================================================
FAIL: test_handles_multiline_strings_in_hostname_validation_properly (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 179, in test_handles_multiline_strings_in_hostname_validation_properly
    self.assertFalse(solution.Validations.is_hostname("foo.com\n"))
AssertionError: True is not false

======================================================================
FAIL: test_handles_newlines_in_date_validation (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 259, in test_handles_newlines_in_date_validation
    self.assertFalse(solution.Validations.is_date("2012-11-19\n"))
AssertionError: True is not false

======================================================================
FAIL: test_handles_newlines_in_time_and_datetime_validation (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 288, in test_handles_newlines_in_time_and_datetime_validation
    self.assertFalse(solution.Validations.is_time("12:01:01\n"))
AssertionError: True is not false

======================================================================
FAIL: test_validates_hostnames (test.ValidationsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "lib/language/python/runner.py", line 60, in thread
    raise it.exc_info[1]
  File "lib/language/python/runner.py", line 48, in run
    self.result = func(*args, **kwargs)
  File "/tmp/d20140513-11348-yceqn9/test.py", line 170, in test_validates_hostnames
    self.assertTrue(solution.Validations.is_hostname('x.io'))
AssertionError: False is not true

----------------------------------------------------------------------
Ran 39 tests in 0.047s

FAILED (failures=10)

История (1 версия и 0 коментара)

Атанас обнови решението на 23.04.2014 12:57 (преди над 10 години)

+import re
+
+
+class PrivacyFilter:
+ EMAIL_PATTERN = (r'\b(?P<username>'
+ r'(?P<first3>[a-zA-Z0-9][a-zA-Z0-9_+.-]{2})'
+ r'[a-zA-Z0-9_+.-]{,198})@(?P<host>\S+)\b')
+
+ PHONE_PATTERN = (r'(0[^0]|(?P<code>(00|\+)[1-9]\d{,2}))'
+ r'([- \(\)]{,2}\d){5,10}[^- \(\)]\b')
+
+ def __init__(self, text):
+ self.text = text
+ self.preserve_phone_country_code = False
+ self.preserve_email_hostname = False
+ self.partially_preserve_email_username = False
+
+ def filtered(self):
+ filtered_text = re.sub(self.EMAIL_PATTERN, self.filter_email,
+ self.text)
+ return re.sub(self.PHONE_PATTERN, self.filter_phone,
+ filtered_text)
+
+ def filter_email(self, matchobj):
+ if not Validations.is_hostname(matchobj.group('host')):
+ return matchobj.group(0)
+ if self.preserve_email_hostname:
+ return "[FILTERED]@{}".format(matchobj.group('host'))
+ if self.partially_preserve_email_username:
+ if len(matchobj.group('username')) < 6:
+ return "[FILTERED]@{}".format(matchobj.group('host'))
+ else:
+ return "{}[FILTERED]@{}".format(matchobj.group('first3'),
+ matchobj.group('host'))
+
+ return "[EMAIL]"
+
+ def filter_phone(self, matchobj):
+ if self.preserve_phone_country_code:
+ code = matchobj.group('code')
+ if code and len(code) > 0:
+ return "{} [FILTERED]".format(matchobj.group('code'))
+
+ return "[PHONE]"
+
+
+class Validations:
+ @classmethod
+ def is_email(cls, text):
+ pattern = r'^[a-zA-Z0-9][a-zA-Z0-9_+.-]{,200}@(.+)$'
+ match = re.search(pattern, text)
+ if match is None:
+ return False
+ else:
+ return cls.is_hostname(match.groups()[0])
+
+ @classmethod
+ def is_hostname(cls, text):
+ pattern = (r'^([0-9a-zA-Z][0-9a-zA-Z-]{,62}[^-])+\.[a-zA-Z]{2,3}'
+ r'(\.[a-zA-Z]{2})?$')
+ match = re.search(pattern, text)
+
+ return match is not None
+
+ @classmethod
+ def is_phone(cls, text):
+ pattern = (r'^(0[^0]|(00|\+)[1-9]\d{,2})'
+ r'([- \(\)]{,2}\d){5,10}[^- \(\)]$')
+ match = re.search(pattern, text)
+
+ return match is not None
+
+ @classmethod
+ def is_ip_address(cls, text):
+ pattern = r'^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$'
+ match = re.search(pattern, text)
+ if match is None:
+ return False
+ else:
+ return all([int(number) >= 0 and int(number) < 256
+ for number in match.groups()])
+
+ @classmethod
+ def is_integer(cls, text, part=False):
+ if part:
+ pattern = r'^\d*$'
+ else:
+ pattern = r'^-?[1-9]\d*$'
+ pattern_zero = r'^-?0$'
+ if re.search(pattern_zero, text) is None:
+ match = re.search(pattern, text)
+ else:
+ return True
+
+ return match is not None
+
+ @classmethod
+ def is_number(cls, text):
+ pattern = r'^(-?\d+)(\.(\d+))?$'
+ match = re.search(pattern, text)
+ if match is None:
+ return False
+ else:
+ if match.group(3) is None:
+ return cls.is_integer(match.group(1))
+ else:
+ return (cls.is_integer(match.group(1))
+ and cls.is_integer(match.group(3), True))
+
+ @classmethod
+ def is_date(cls, text):
+ pattern = r'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})$'
+ match = re.search(pattern, text)
+ if match is None:
+ return False
+ else:
+ return (1 <= int(match.group('month')) <= 12
+ and 1 <= int(match.group('day')) <= 31)
+
+ @classmethod
+ def is_time(cls, text):
+ pattern = r'^(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})$'
+ match = re.search(pattern, text)
+ if match is None:
+ return False
+ else:
+ return (0 <= int(match.group('hour')) <= 23
+ and 0 <= int(match.group('min')) <= 59
+ and 0 <= int(match.group('sec')) <= 59)
+
+ @classmethod
+ def is_datetime(cls, text):
+ pattern = r'^(?P<date>.+)[T| ](?P<time>.+)$'
+ match = re.search(pattern, text)
+ if match is None:
+ return False
+ else:
+ return (cls.is_date(match.group('date'))
+ and cls.is_time(match.group('time')))