Notes

JavaScript - Regular Expression notes

author: Paul Kim

categories: js, regex

tags: js, regex

create a regular expression

two ways to create a regular expression:

  • regex literal
  • regex constructor function
// regex literal
var re = /ab+c/

// regex constructor function
var re = new RegExp('ab+c')

var re2 = /[a-z]\s/i
var re2 = new RegExp('[a-z]\\s', 'i')

var re3 = /Chapter (\d+)\.\d*/
var re3 = new RegExp('Chapter (\\d+)\\.\\d*')

var re4 = /[a-z]:\\/i
var re4 = new RegExp('[a-z]:\\\\', 'i')

a simple regular expression pattern

var str1 = 'test'
var str2 = 'testest'
var str3 = 'testtest'
var str4 = 'test test'
var str5 = 'this is a test'
var str6 = 'test in progress'

// contains "test"
var re1 = /test/
var re2 = new RegExp('test')

console.log(re1.test(str1)) // true
console.log(re1.test(str2)) // true
console.log(re1.test(str3)) // true
console.log(re1.test(str4)) // true
console.log(re1.test(str5)) // true
console.log(re1.test(str6)) // true

console.log(re2.test(str1)) // true
console.log(re2.test(str2)) // true
console.log(re2.test(str3)) // true
console.log(re2.test(str4)) // true
console.log(re2.test(str5)) // true
console.log(re2.test(str6)) // true

boundaries

boundaries indicate the beginnings and endings of lines and words.

^ matches the beginning of the string

if the m multiline flag is set to true, also matches immediately after a line break character

// starts with "test"
var re1 = /^test/
var re2 = new RegExp('^test')

var str1 = 'this is a test'
console.log(re1.test(str1)) // false
console.log(re2.test(str1)) // false

var str2 = 'test in progress'
console.log(re1.test(str2)) // true
console.log(re2.test(str2)) // true

$ matches the end of the string

if the m multiline flag is set to true, also matches immediately before a line break character

// ends with "test"
var re1 = /test$/
var re2 = new RegExp('test$')

var str1 = 'this is a test'
console.log(re1.test(str1)) // true
console.log(re2.test(str1)) // true

var str2 = 'test in progress'
console.log(re1.test(str2)) // false
console.log(re2.test(str2)) // false

^ and $ matches the beginning and the end of the string

// starts and ends with "test"
var re1 = /^test$/
var re2 = new RegExp('^test$')

var str1 = 'this is a test'
console.log(re1.test(str1)) // false
console.log(re2.test(str1)) // false

var str2 = 'test in progress'
console.log(re1.test(str2)) // false
console.log(re2.test(str2)) // false

var str3 = 'test'
console.log(re1.test(str3)) // true
console.log(re2.test(str3)) // true

\b matches a word boundary

var str = 'moon'
console.log(/\bm/.test(str)) // true; matches the 'm' in 'moon'
console.log(/oo\b/.test(str)) // false; does not match the 'oo' in 'moon'
console.log(/oon\b/.test(str)) // true; matches the 'oon' in 'moon'
console.log(/\w\b\w/.test(str)) // false; will never match anything

Note: [\b] is used to match a backspace character

\B matches a non-word boundary (a position where the previous and next characters are of the same type: either both must be words, or both must be non-words)

console.log(/\Bon/.test('at noon')) // true; matches 'on' in 'at noon'
console.log(/ye\B/.test('possibly yesterday')) // true; matches 'ye' in 'possibly yesterday'

general overview example

// Using Regex boundaries to fix buggy string.
buggyMultiline = `tey, ihe light-greon thi apple
tangs on ihe greon traa`

// 1) for each line, if the first string start with 't', replace the 't' with 'h'.
buggyMultiline = buggyMultiline.replace(/^t/gim, 'h')
console.log(1, buggyMultiline) // replace 'tey' with 'hey' and 'tangs' with 'hangs'

// 2) for each line, if the last string ends with 'ee', replace the 'ee' with 'aa'
buggyMultiline = buggyMultiline.replace(/aa$/gim, 'ee.')
console.log(2, buggyMultiline) // replace 'traa' with 'tree'

// 3) for each line, if a word starts with 'i', replace 'i' with 't'
buggyMultiline = buggyMultiline.replace(/\bi/gim, 't')
console.log(3, buggyMultiline) // replace 'ihe' with 'the'

// 4) for each line, if a word ends with 'i', replace 'i' with 'e'
buggyMultiline = buggyMultiline.replace(/i\b/gim, 'e')
console.log(4, buggyMultiline) // replace 'thi' with 'the'

// 5) for each line, if a word contains 'o' but doesn't start or end with 'o', replace 'o' with 'e'
fixedMultiline = buggyMultiline.replace(/\Bo/gim, 'e')
console.log(5, fixedMultiline) // replace 'greon' with 'green'

starting with 'A'

let fruits = ['Apple', 'Watermelon', 'Orange', 'Avocado', 'Strawberry']
let fruitsStartsWithA = fruits.filter((fruit) => /^A/.test(fruit))
console.log(fruitsStartsWithA) // [ 'Apple', 'Avocado' ]

not starting with 'A'

let fruits = ['Apple', 'Watermelon', 'Orange', 'Avocado', 'Strawberry']
let fruitsStartsWithNotA = fruits.filter((fruit) => /^[^A]/.test(fruit))
console.log(fruitsStartsWithNotA) // [ 'Watermelon', 'Orange', 'Strawberry' ]

quantifiers

  • * - 0 or more (greedy)
  • + - 1 or more (greedy)
  • *? - 0 or more (lazy)
  • +? - 1 or mroe (lazy)
console.log(RegExp('w.+f', 'g').test('wf')) // false

console.log(RegExp('w.*f', 'g').test('wf')) // true

flags

flag description
g global - find all, not just the first
i case-insensitive
m multi-line
s dotall - allow . to match newlines
u unicode
y sticky

miscellaneous

split by one or more whitespace character

'btn    -btn    btn-'
  .split(' ')(
    // ["btn", "", "", "", "-btn", "", "", "", "btn-"]

    'btn    -btn    btn-'
  )
  .split(/\s+/)
// ["btn", "-btn", "btn-"]

split string by whitespace or comma

'foo, bar baz, bazaar'.split(/[ ,]+/)
// ["foo", "bar", "baz", "bazaar"]
'foo,   bar  baz   ,        bazaar'.split(/[ ,]+/)
// ["foo", "bar", "baz", "bazaar"]

'foo, bar baz, bazaar'.split(/,?\s+/)
// ["foo", "bar", "baz", "bazaar"]
'foo,   bar  baz   ,        bazaar'.split(/,?\s+/).filter(Boolean)
// ["foo", "bar", "baz", "bazaar"]

replace everything after http://example.com/path1/ with *

let url = `http://example.com/path1/path2?abcd`

// use sticky flag y (but IE doesn't support it)
let foo = url.replace(/(^https?:\/\/.*?\/path1\/?|(?!^))./gy, '$1*')
console.log(foo) // http://example.com/path1/**********

foo = url.replace(/^(https?:\/\/.+\/path1\/?)(.*)/, function (_, m1, m2) {
  return m1 + '*'.repeat(m2.length)
})
console.log(foo) // http://example.com/path1/**********

replace everything before @ with *s

let email = `person@example.com`
console.log(email.replace(/.(?=.*@)/g, '*')) // ******@example.com

convert 'hi-there-bob' to 'hiThereBob'

'hi-there-bob'.replace(/-(\w)/g, function (g) {
  return g[1].toUpperCase()
})
// or
'hi-there-bob'.replace(/-[a-z]/g, function (g) {
  return g[1].toUpperCase()
})
// or
'hi-there-bob'.replace(/\b-+(\w)/g, (_, char) => char.toUpperCase())

using a function with String.prototype.replace

// capture 'btn-' as p1, capture 0 or more word character as p2
// then format it as |p1|p2|
'btn-'.replace(/^(btn-)([\w]*)/, (m, p1, p2) => `|${p1}|${p2}`) // "|btn-|"
'btn-red'.replace(/^(btn-)([\w]*)/, (m, p1, p2) => `|${p1}|${p2}`) // "|btn-|red"

// capture 'btn-' as p1, capture 1 or more word character as p2
// then format it as |p1|p2|
'btn-'.replace(/^(btn-)([\w]+)/, (m, p1, p2) => `|${p1}|${p2}`) // "btn-" <- notice it did not replace because regex didn't match
'btn-red'.replace(/^(btn-)([\w]+)/, (m, p1, p2) => `|${p1}|${p2}`) // "|btn-|red"
Guide
Reference

ChromeEdgeFirefoxOpera

© 2021 paulkode.com. All rights reserved.