1.findFirstIn findFirstMatchIn


  def test() = {
    val s = "你好今天是2021年1月2日18点30分"
    val pattern = """今天是\d+年\d+月\d+日""".r
    val result1 = pattern.findFirstIn(s)
    val result2 = pattern.findFirstMatchIn(s) match {
      case Some(data) => {
        println("data type is: " + data.getClass.getSimpleName)
        data group 0
      case _ => "empty"


data type is: Match


  /** Return an optional first matching string of this `Regex` in the given character sequence,
   *  or None if there is no match.
   *  @param source The text to match against.
   *  @return       An [[scala.Option]] of the first matching string in the text.
   *  @example      {{{"""\w+""".r findFirstIn "A simple example." foreach println // prints "A"}}}
  def findFirstIn(source: CharSequence): Option[String] = {
    val m = pattern.matcher(source)
    if (m.find) Some(m.group) else None


  /** Return an optional first match of this `Regex` in the given character sequence,
   *  or None if it does not exist.
   *  If the match is successful, the [[scala.util.matching.Regex.Match]] can be queried for
   *  more data.
   *  @param source The text to match against.
   *  @return       A [[scala.Option]] of [[scala.util.matching.Regex.Match]] of the first matching string in the text.
   *  @example      {{{("""[a-z]""".r findFirstMatchIn "A simple example.") map (_.start) // returns Some(2), the index of the first match in the text}}}
  def findFirstMatchIn(source: CharSequence): Option[Match] = {
    val m = pattern.matcher(source)
    if (m.find) Some(new Match(source, m, groupNames)) else None


2.Match MatchData


  /** Provides information about a successful match. */
  class Match(val source: CharSequence,
              private[matching] val matcher: Matcher,
              val groupNames: Seq[String]) extends MatchData {

    /** The index of the first matched character. */
    val start = matcher.start

    /** The index following the last matched character. */
    val end = matcher.end

    /** The number of subgroups. */
    def groupCount = matcher.groupCount

    private lazy val starts: Array[Int] =
      ((0 to groupCount) map matcher.start).toArray
    private lazy val ends: Array[Int] =
      ((0 to groupCount) map matcher.end).toArray

    /** The index of the first matched character in group `i`. */
    def start(i: Int) = starts(i)

    /** The index following the last matched character in group `i`. */
    def end(i: Int) = ends(i)

    /** The match itself with matcher-dependent lazy vals forced,
     *  so that match is valid even once matcher is advanced.
    def force: this.type = { starts; ends; this }

第一行注释非常关键告诉了我们Match类最重要的作用Provides information about a successful match。如果匹配成功这个类会给我们提供一些匹配成功的信息包括匹配成功的起始位置等。

 trait MatchData {

    /** The source from which the match originated */
    val source: CharSequence

    /** The names of the groups, or an empty sequence if none defined */
    val groupNames: Seq[String]

    /** The number of capturing groups in the pattern.
     *  (For a given successful match, some of those groups may not have matched any input.)
    def groupCount: Int

    /** The index of the first matched character, or -1 if nothing was matched */
    def start: Int

    /** The index of the first matched character in group `i`,
     *  or -1 if nothing was matched for that group.
    def start(i: Int): Int

    /** The matched string in group `i`,
     *  or `null` if nothing was matched.
    def group(i: Int): String =
      if (start(i) >= 0) source.subSequence(start(i), end(i)).toString
      else null

    /** Returns the group with given name.
     *  @param id The group name
     *  @return   The requested group
     *  @throws   NoSuchElementException if the requested group name is not defined
    def group(id: String): String = nameToIndex.get(id) match {
      case None => throw new NoSuchElementException("group name "+id+" not defined")
      case Some(index) => group(index)



  def test() = {
    val s = "你好今天是2021年1月2日18点30分"
    val pattern = """今天是(\d+)年(\d+)月(\d+)日""".r
    val result = pattern.findFirstMatchIn(s)
    val year = result match {
      case Some(data) => data group 1
      case _ => "-1"
    println(year)  // 结果为 2021




  def test() = {
    val s = "你好今天是2021年1月2日18点30分"
    val pattern = """今天是(\d+)年(\d+)月(\d+)日""".r
    val pattern(year, month, day) = s
    println(s"year is $year.\n" +
      f"month is $month.\n" + raw"day is $day")


scala.MatchError: 你好今天是2021年1月2日18点30分 (of class java.lang.String)

    at com.xiaomi.mifi.pdata.common.T4.t8(T4.scala:114)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)

val pattern(year, month, day) = s

  def unapplySeq(s: CharSequence): Option[List[String]] = s match {
    case null => None
    case _    =>
      val m = pattern matcher s
      if (runMatcher(m)) Some((1 to m.groupCount).toList map m.group)
      else None


  /** Tries to match a [[java.lang.CharSequence]].
   *  If the match succeeds, the result is a list of the matching
   *  groups (or a `null` element if a group did not match any input).
   *  If the pattern specifies no groups, then the result will be an empty list
   *  on a successful match.
   *  This method attempts to match the entire input by default; to find the next
   *  matching subsequence, use an unanchored `Regex`.



  def test() = {
    val s = "你好今天是2021年1月2日18点30分"
    val pattern = """今天是(\d+)年(\d+)月(\d+)日""".r.unanchored
    val pattern(year, month, day) = s
    println(s"year is $year.\n" +
      f"month is $month.\n" + raw"day is $day")


year is 2021.
month is 1.
day is 2

5.findAllIn findAllMatchIn


  def t9() = {
    val dateRegex =  """(\d{4})-(\d{2})-(\d{2})""".r
    val dates = "dates in history: 2004-01-20, 2005-02-28, 1998-01-15, 2009-10-25"
    val result =  dateRegex.findAllIn(dates)
    val array =  for (each <- result) yield each
non-empty iterator
2004-01-20  2005-02-28  1998-01-15  2009-10-25


  /** Return all non-overlapping matches of this `Regex` in the given character 
   *  sequence as a [[scala.util.matching.Regex.MatchIterator]],
   *  which is a special [[scala.collection.Iterator]] that returns the
   *  matched strings but can also be queried for more data about the last match,
   *  such as capturing groups and start position.

  def findAllIn(source: CharSequence) = new Regex.MatchIterator(source, this, groupNames)

返回的是一个MatchIterator根据注释信息可以看出来MatchIterator是scala.collection.Iterator的一个特例所以直接println(array)得到的信息是一个non-empty iterator。


  def t10() = {
    val dateRegex =  """(\d{4})-(\d{2})-(\d{2})""".r
    val dates = "dates in history: 2004-01-20, 2005-02-28, 1998-01-15, 2009-10-25"
    val result = dateRegex.findAllMatchIn(dates)
    val array = for(each <- result) yield each.group(1)


non-empty iterator
2004    2005    1998    2009