Add a primop for regular expression pattern matching
The function ‘builtins.match’ takes a POSIX extended regular
expression and an arbitrary string. It returns ‘null’ if the string
does not match the regular expression. Otherwise, it returns a list
containing substring matches corresponding to parenthesis groups in
the regex. The regex must match the entire string (i.e. there is an
implied "^<pat>$" around the regex).  For example:
  match "foo" "foobar" => null
  match "foo" "foo" => []
  match "f(o+)(.*)" "foooobar" => ["oooo" "bar"]
  match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"]
  match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"]
The following example finds all regular files with extension .nix or
.patch underneath the current directory:
  let
    findFiles = pat: dir: concatLists (mapAttrsToList (name: type:
      if type == "directory" then
        findFiles pat (dir + "/" + name)
      else if type == "regular" && match pat name != null then
        [(dir + "/" + name)]
      else []) (readDir dir));
  in findFiles ".*\\.(nix|patch)" (toString ./.)
			
			
This commit is contained in:
		
							parent
							
								
									4e340a983f
								
							
						
					
					
						commit
						976df480c9
					
				
					 5 changed files with 84 additions and 5 deletions
				
			
		|  | @ -1430,7 +1430,34 @@ static void prim_hashString(EvalState & state, const Pos & pos, Value * * args, | |||
|     string s = state.forceString(*args[1], context, pos); | ||||
| 
 | ||||
|     mkString(v, printHash(hashString(ht, s)), context); | ||||
| }; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /* Match a regular expression against a string and return either
 | ||||
|    ‘null’ or a list containing substring matches. */ | ||||
| static void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v) | ||||
| { | ||||
|     Regex regex(state.forceStringNoCtx(*args[0], pos), true); | ||||
| 
 | ||||
|     PathSet context; | ||||
|     string s = state.forceString(*args[1], context, pos); | ||||
| 
 | ||||
|     Regex::Subs subs; | ||||
|     if (!regex.matches(s, subs)) { | ||||
|         mkNull(v); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     unsigned int len = subs.empty() ? 0 : subs.rbegin()->first + 1; | ||||
|     state.mkList(v, len); | ||||
|     for (unsigned int n = 0; n < len; ++n) { | ||||
|         auto i = subs.find(n); | ||||
|         if (i == subs.end()) | ||||
|             mkNull(*(v.list.elems[n] = state.allocValue())); | ||||
|         else | ||||
|             mkString(*(v.list.elems[n] = state.allocValue()), i->second); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*************************************************************
 | ||||
|  | @ -1584,6 +1611,7 @@ void EvalState::createBaseEnv() | |||
|     addPrimOp("__unsafeDiscardStringContext", 1, prim_unsafeDiscardStringContext); | ||||
|     addPrimOp("__unsafeDiscardOutputDependency", 1, prim_unsafeDiscardOutputDependency); | ||||
|     addPrimOp("__hashString", 2, prim_hashString); | ||||
|     addPrimOp("__match", 2, prim_match); | ||||
| 
 | ||||
|     // Versions
 | ||||
|     addPrimOp("__parseDrvName", 1, prim_parseDrvName); | ||||
|  |  | |||
|  | @ -1,13 +1,16 @@ | |||
| #include "regex.hh" | ||||
| #include "types.hh" | ||||
| 
 | ||||
| #include <algorithm> | ||||
| 
 | ||||
| namespace nix { | ||||
| 
 | ||||
| Regex::Regex(const string & pattern) | ||||
| Regex::Regex(const string & pattern, bool subs) | ||||
| { | ||||
|     /* Patterns must match the entire string. */ | ||||
|     int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), REG_NOSUB | REG_EXTENDED); | ||||
|     if (err) throw Error(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err)); | ||||
|     int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 : REG_NOSUB) | REG_EXTENDED); | ||||
|     if (err) throw RegexError(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err)); | ||||
|     nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0; | ||||
| } | ||||
| 
 | ||||
| Regex::~Regex() | ||||
|  | @ -23,6 +26,20 @@ bool Regex::matches(const string & s) | |||
|     throw Error(format("matching string ‘%1%’: %2%") % s % showError(err)); | ||||
| } | ||||
| 
 | ||||
| bool Regex::matches(const string & s, Subs & subs) | ||||
| { | ||||
|     regmatch_t pmatch[nrParens + 2]; | ||||
|     int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0); | ||||
|     if (err == 0) { | ||||
|         for (unsigned int n = 2; n < nrParens + 2; ++n) | ||||
|             if (pmatch[n].rm_eo != -1) | ||||
|                 subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo - pmatch[n].rm_so); | ||||
|         return true; | ||||
|     } | ||||
|     else if (err == REG_NOMATCH) return false; | ||||
|     throw Error(format("matching string ‘%1%’: %2%") % s % showError(err)); | ||||
| } | ||||
| 
 | ||||
| string Regex::showError(int err) | ||||
| { | ||||
|     char buf[256]; | ||||
|  |  | |||
|  | @ -5,16 +5,23 @@ | |||
| #include <sys/types.h> | ||||
| #include <regex.h> | ||||
| 
 | ||||
| #include <map> | ||||
| 
 | ||||
| namespace nix { | ||||
| 
 | ||||
| MakeError(RegexError, Error) | ||||
| 
 | ||||
| class Regex | ||||
| { | ||||
| public: | ||||
|     Regex(const string & pattern); | ||||
|     Regex(const string & pattern, bool subs = false); | ||||
|     ~Regex(); | ||||
|     bool matches(const string & s); | ||||
|     typedef std::map<unsigned int, string> Subs; | ||||
|     bool matches(const string & s, Subs & subs); | ||||
| 
 | ||||
| private: | ||||
|     unsigned nrParens; | ||||
|     regex_t preg; | ||||
|     string showError(int err); | ||||
| }; | ||||
|  |  | |||
							
								
								
									
										1
									
								
								tests/lang/eval-okay-regex-match.exp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								tests/lang/eval-okay-regex-match.exp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| true | ||||
							
								
								
									
										26
									
								
								tests/lang/eval-okay-regex-match.nix
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								tests/lang/eval-okay-regex-match.nix
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| with builtins; | ||||
| 
 | ||||
| let | ||||
| 
 | ||||
|   matches = pat: s: match pat s != null; | ||||
| 
 | ||||
|   splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)"; | ||||
| 
 | ||||
| in | ||||
| 
 | ||||
| assert  matches "foobar" "foobar"; | ||||
| assert  matches "fo*" "f"; | ||||
| assert !matches "fo+" "f"; | ||||
| assert  matches "fo*" "fo"; | ||||
| assert  matches "fo*" "foo"; | ||||
| assert  matches "fo+" "foo"; | ||||
| assert  matches "fo{1,2}" "foo"; | ||||
| assert !matches "fo{1,2}" "fooo"; | ||||
| assert !matches "fo*" "foobar"; | ||||
| 
 | ||||
| assert match "(.*)\\.nix" "foobar.nix" == [ "foobar" ]; | ||||
| 
 | ||||
| assert splitFN "/path/to/foobar.nix" == [ "/path/to/" "/path/to" "foobar" "nix" ]; | ||||
| assert splitFN "foobar.cc" == [ null null "foobar" "cc" ]; | ||||
| 
 | ||||
| true | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue