REGULAR EXPRESSION EXAMPLES BY NEEDS

Passwords

//Password complexity
//Tests if the input consists of 6 or more letters, digits, underscores and hyphens.
//The input must contain at least one upper case letter, one lower case letter and one digit.
‘\A(?=[-_a-zA-Z0-9]*?[A-Z])(?=[-_a-zA-Z0-9]*?[a-z])(?=[-_a-zA-Z0-9]*?[0-9])[-_a-zA-Z0-9]{6,}\z’

//Password complexity
//Tests if the input consists of 6 or more characters.
//The input must contain at least one upper case letter, one lower case letter and one digit.
‘\A(?=[-_a-zA-Z0-9]*?[A-Z])(?=[-_a-zA-Z0-9]*?[a-z])(?=[-_a-zA-Z0-9]*?[0-9])\S{6,}\z’

 

File paths
//Path: Windows
‘\b[a-z]:\\[^/:*?"<>|\r\n]*’

//Path: Windows
//Different elements of the path are captured into backreferences.
‘\b((?#drive)[a-z]):\\((?#folder)[^/:*?"<>|\r\n]*\\)?((?#file)[^\\/:*?"<>|\r\n]*)’

//Path: Windows or UNC
‘(?:(?#drive)\b[a-z]:|\\\\[a-z0-9]+)\\[^/:*?"<>|\r\n]*’

//Path: Windows or UNC
//Different elements of the path are captured into backreferences.
‘((?#drive)\b[a-z]:|\\\\[a-z0-9]+)\\((?#folder)[^/:*?"<>|\r\n]*\\)?((?#file)[^\\/:*?"<>|\r\n]*)’

 

Phone numbers
//Phone Number (North America)
//Matches 3334445555, 333.444.5555, 333-444-5555, 333 444 5555, (333) 444 5555 and all combinations thereof.
//Replaces all those with (333) 444-5555
preg_replace(‘\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})’, ‘(\1) \2-\3’, $text);

//Phone Number (North America)
//Matches 3334445555, 333.444.5555, 333-444-5555, 333 444 5555, (333) 444 5555 and all combinations thereof.
‘\(?[0-9]{3}\)?[-. ]?[0-9]{3}[-. ]?[0-9]{4}’

 

Postal codes
//Postal code (Canada)
‘\b[ABCEGHJKLMNPRSTVXY][0-9][A-Z] [0-9][A-Z][0-9]\b’

//Postal code (UK)
‘\b[A-Z]{1,2}[0-9][A-Z0-9]? [0-9][ABD-HJLNP-UW-Z]{2}\b’

 

Programming
//Programming: # comment
//Single-line comment started by # anywhere on the line
‘#.*$’

//Programming: # preprocessor statement
//Started by # at the start of the line, possibly preceded by some whitespace.
‘^\s*#.*$’

//Programming: /* comment */
//Does not match nested comments.  Most languages, including C, Java, C#, etc.
//do not allow comments to be nested.  I.e. the first */ closes the comment.
‘/\*.*?\*/’

//Programming: // comment
//Single-line comment started by // anywhere on the line
‘//.*$’

//Programming: GUID
//Microsoft-style GUID, numbers only.
‘[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}’

//Programming: GUID
//Microsoft-style GUID, with optional parentheses or braces.
//(Long version, if your regex flavor doesn’t support conditionals.)
‘[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}|\([A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}\)|\{[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}\}’

//Programming: GUID
//Microsoft-style GUID, with optional parentheses or braces.
//Short version, illustrating the use of regex conditionals.  Not all regex flavors support conditionals.
//Also, when applied to large chunks of data, the regex using conditionals will likely be slower
//than the long version.  Straight alternation is much easier to optimize for a regex engine.
‘(?:(\()|(\{))?[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}(?(1)\))(?(2)\})’

//Programming: Remove escapes
//Remove backslashes used to escape other characters
preg_replace(‘\\(.)’, ‘\1’, $text);

//Programming: String
//Quotes may appear in the string when escaped with a backslash.
//The string may span multiple lines.
‘"[^"\\]*(?:\\.[^"\\]*)*"’

//Programming: String
//Quotes may appear in the string when escaped with a backslash.
//The string cannot span multiple lines.
‘"[^"\\\r\n]*(?:\\.[^"\\\r\n]*)*"’

//Programming: String
//Quotes may not appear in the string.  The string cannot span multiple lines.
‘"[^"\r\n]*"’

 

Quotes
//Quotes: Replace smart double quotes with straight double quotes.
//ANSI version for use with 8-bit regex engines and the Windows code page 1252.
preg_replace(‘[\x84\x93\x94]’, ‘"’, $text);

//Quotes: Replace smart double quotes with straight double quotes.
//Unicode version for use with Unicode regex engines.
preg_replace(‘[\u201C\u201D\u201E\u201F\u2033\u2036]’, ‘"’, $text);

//Quotes: Replace smart single quotes and apostrophes with straight single quotes.
//Unicode version for use with Unicode regex engines.
preg_replace("[\u2018\u2019\u201A\u201B\u2032\u2035]", "’", $text);

//Quotes: Replace smart single quotes and apostrophes with straight single quotes.
//ANSI version for use with 8-bit regex engines and the Windows code page 1252.
preg_replace("[\x82\x91\x92]", "’", $text);

//Quotes: Replace straight apostrophes with smart apostrophes
preg_replace("\b’\b", "?", $text);

//Quotes: Replace straight double quotes with smart double quotes.
//ANSI version for use with 8-bit regex engines and the Windows code page 1252.
preg_replace(‘\B"\b([^"\x84\x93\x94\r\n]+)\b"\B’, ‘?\1?’, $text);

//Quotes: Replace straight double quotes with smart double quotes.
//Unicode version for use with Unicode regex engines.
preg_replace(‘\B"\b([^"\u201C\u201D\u201E\u201F\u2033\u2036\r\n]+)\b"\B’, ‘?\1?’, $text);

//Quotes: Replace straight single quotes with smart single quotes.
//Unicode version for use with Unicode regex engines.
preg_replace("\B’\b([^’\u2018\u2019\u201A\u201B\u2032\u2035\r\n]+)\b’\B", "?\1?", $text);

//Quotes: Replace straight single quotes with smart single quotes.
//ANSI version for use with 8-bit regex engines and the Windows code page 1252.
preg_replace("\B’\b([^’\x82\x91\x92\r\n]+)\b’\B", "?\1?", $text);

 

Escape
//Regex: Escape metacharacters
//Place a backslash in front of the regular expression metacharacters
preg_replace("[][{}()*+?.\\^$|]", "\\$0", $text);

 

Security
//Security: ASCII code characters excl. tab and CRLF
//Matches any single non-printable code character that may cause trouble in certain situations.
//Excludes tabs and line breaks.
‘[\x00\x08\x0B\x0C\x0E-\x1F]’

//Security: ASCII code characters incl. tab and CRLF
//Matches any single non-printable code character that may cause trouble in certain situations.
//Includes tabs and line breaks.
‘[\x00-\x1F]’

//Security: Escape quotes and backslashes
//E.g. escape user input before inserting it into a SQL statement
preg_replace("\\$0", "\\$0", $text);

//Security: Unicode code and unassigned characters excl. tab and CRLF
//Matches any single non-printable code character that may cause trouble in certain situations.
//Also matches any Unicode code point that is unused in the current Unicode standard,
//and thus should not occur in text as it cannot be displayed.
//Excludes tabs and line breaks.
‘[^\P{C}\t\r\n]’

//Security: Unicode code and unassigned characters incl. tab and CRLF
//Matches any single non-printable code character that may cause trouble in certain situations.
//Also matches any Unicode code point that is unused in the current Unicode standard,
//and thus should not occur in text as it cannot be displayed.
//Includes tabs and line breaks.
‘\p{C}’

//Security: Unicode code characters excl. tab and CRLF
//Matches any single non-printable code character that may cause trouble in certain situations.
//Excludes tabs and line breaks.
‘[^\P{Cc}\t\r\n]’

//Security: Unicode code characters incl. tab and CRLF
//Matches any single non-printable code character that may cause trouble in certain situations.
//Includes tabs and line breaks.
‘\p{Cc}’

 

SSN (Social security numbers)
//Social security number (US)
‘\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b’

 

Trim
//Trim whitespace (including line breaks) at the end of the string
preg_replace("\s+\z", "", $text);

//Trim whitespace (including line breaks) at the start and the end of the string
preg_replace("\A\s+|\s+\z", "", $text);

//Trim whitespace (including line breaks) at the start of the string
preg_replace("\A\s+", "", $text);

//Trim whitespace at the end of each line
preg_replace("[ \t]+$", "", $text);

//Trim whitespace at the start and the end of each line
preg_replace("^[ \t]+|[ \t]+$", "", $text);

//Trim whitespace at the start of each line
preg_replace("^[ \t]+", "", $text);

 

URL’s
//URL: Different URL parts
//Protocol, domain name, page and CGI parameters are captured into backreferenes 1 through 4
‘\b((?#protocol)https?|ftp)://((?#domain)[-A-Z0-9.]+)((?#file)/[-A-Z0-9+&@#/%=~_|!:,.;]*)?((?#parameters)\?[-A-Z0-9+&@#/%=~_|!:,.;]*)?’

//URL: Different URL parts
//Protocol, domain name, page and CGI parameters are captured into named capturing groups.
//Works as it is with .NET, and after conversion by RegexBuddy on the Use page with Python, PHP/preg and PCRE.
‘\b(?https?|ftp)://(?[-A-Z0-9.]+)(?/[-A-Z0-9+&@#/%=~_|!:,.;]*)?(?\?[-A-Z0-9+&@#/%=~_|!:,.;]*)?’

//URL: Find in full text
//The final character class makes sure that if an URL is part of some text, punctuation such as a
//comma or full stop after the URL is not interpreted as part of the URL.
‘\b(https?|ftp|file)://[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|]’

//URL: Replace URLs with HTML links
preg_replace(‘\b(https?|ftp|file)://[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|]’, ”, $text);

 

Words
//Words: Any word NOT matching a particular regex
//This regex will match all words that cannot be matched by %REGEX%.
//Explanation: Observe that the negative lookahead and the \w+ are repeated together.
//This makes sure we test that %REGEX% fails at EVERY position in the word, and not just at any particular position.
‘\b(?:(?!%REGEX%)\w)+\b’

//Words: Delete repeated words
//Find any word that occurs twice or more in a row.
//Delete all occurrences except the first.
preg_replace(‘\b(\w+)(?:\s+\1\b)+’, ‘\1’, $text);

//Words: Near, any order
//Matches word1 and word2, or vice versa, separated by at least 1 and at most 3 words
‘\b(?:word1(?:\W+\w+){1,3}\W+word2|word2(?:\W+\w+){1,3}\W+word1)\b’

//Words: Near, list
//Matches any pair of words out of the list word1, word2, word3, separated by at least 1 and at most 6 words
‘\b(word1|word2|word3)(?:\W+\w+){1,6}\W+(word1|word2|word3)\b’

//Words: Near, ordered
//Matches word1 and word2, in that order, separated by at least 1 and at most 3 words
‘\bword1(?:\W+\w+){1,3}\W+word2\b’

//Words: Repeated words
//Find any word that occurs twice or more in a row.
‘\b(\w+)\s+\1\b’

//Words: Whole word
‘\b%WORD%\b’

//Words: Whole word
//Match one of the words from the list
‘\b(?:word1|word2|word3)\b’

//Words: Whole word at the end of a line
//Whitespace permitted after the word
‘\b%WORD%\s*$’

//Words: Whole word at the end of a line
‘\b%WORD%$’

//Words: Whole word at the start of a line
‘^%WORD%\b’

//Words: Whole word at the start of a line
//Whitespace permitted before the word
‘^\s*%WORD%\b’

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s