SpecRegex  0.5.0
A GPU-accelerated regex library
sp::regex::Regex< RegexStr, TuningParameters > Class Template Reference

A regex. More...

#include <Regex.hpp>

Public Types

enum class  ReplacementMode { SAFE , UNSAFE , INPLACE }
 
using Parsed = impl::ParseRegex< RegexStr >
 
using Match = RegexMatch< Parsed >
 
using Str = RegexStr
 

Static Public Member Functions

constexpr static const char * getCaptureGroupName (int i)
 
template<typename CharT >
static bool isCompleteMatch (const StringView< CharT > &str)
 Returns true iff the regular expression matches the entire target string. More...
 
template<typename CharT >
static Match getCompleteMatch (const StringView< CharT > &str)
 Returns a match iff the regular expression matches the entire target string. More...
 
template<typename CharT >
static bool hasPrefixMatch (StringView< CharT > str)
 Returns true iff the regular expression matches the beginning of the string. More...
 
template<typename CharT >
static Match matchPrefix (StringView< CharT > str)
 Returns the match starting at the beginning of the string. More...
 
template<typename CharT >
static __host__ bool hasMatch (StringView< CharT > str)
 Returns true iff the regular expression matches a substring of the string. More...
 
template<typename CharT >
static __host__ Match findFirst (StringView< CharT > str)
 Returns the first match of a substring in the string. More...
 
template<bool Noncapturing = false, typename CharT , typename MatchConsumer >
static __host__ void mapOverMatches (StringView< CharT > str, MatchConsumer mc)
 Map a function over the matches in a string. More...
 
template<typename CharT >
static __host__ std::vector< MatchfindAll (StringView< CharT > str)
 Find all the matches of this regex in the string. More...
 
template<typename CharT >
static __host__ int findFirstN (StringView< CharT > str, Match *output, int n)
 Find the first n matches of this regex in the string (or all of the matches if there are fewer than n). More...
 
template<typename CharT , typename MatchInterpolator >
static __host__ int inplaceNonincreasingReplaceAllWithLambda (StringView< CharT > &str, MatchInterpolator mi)
 
template<ReplacementMode RM, typename CharT , typename MatchInterpolator >
static __host__ int replaceAllWithLambda (StringView< const CharT > str, StringView< CharT > &out, MatchInterpolator mi)
 
template<typename ReplacementString , ReplacementMode RM = ReplacementMode::SAFE, typename CharT >
static __host__ int replaceAll (StringView< const CharT > str, StringView< CharT > &out)
 Replace all matches of this regex in str with the pattern described by ReplacementString. More...
 
template<ReplacementMode RM = ReplacementMode::SAFE, typename CharT >
static __host__ int replaceAllWithLiteral (StringView< const CharT > str, StringView< const CharT > replacement, StringView< CharT > &out)
 Replace all matches of this regex in str with the literal string replacement. More...
 
template<typename CharT , typename MatchInterpolator , typename Allocator = decltype(arrayAllocator<CharT>), typename Deallocator = decltype(arrayDeleter<CharT>)>
static __host__ int inplaceAllocatingReplaceAllWithLambda (StringView< CharT > &buffer, int inputLength, MatchInterpolator mi, Allocator alloc=arrayAllocator< CharT >, Deallocator dealloc=arrayDeleter< CharT >)
 Perform an inplace replace-all which can allocate more memory if the output is too large, using a lambda to generate the replacements. More...
 
template<typename CharT , typename Allocator = decltype(arrayAllocator<CharT>), typename Deallocator = decltype(arrayDeleter<CharT>)>
static __host__ int inplaceAllocatingReplaceAllWithLiteral (StringView< CharT > &buffer, int inputLength, StringView< const CharT > replacement, Allocator alloc=arrayAllocator< CharT >, Deallocator dealloc=arrayDeleter< CharT >)
 Perform an inplace replace-all with a string literal, which can allocate more memory if the output is too large. More...
 
template<typename CharT , typename MatchInterpolator >
static __host__ int inplaceExpandingReplaceAllWithLambda (StringView< CharT > &buffer, int inputLength, MatchInterpolator mi)
 Perform an inplace replace-all which can use extra space in the buffer if the output needs to grow. More...
 

Static Public Attributes

constexpr static int NumCaptureGroups = Parsed::NumGroups
 
constexpr static uint64_t EnumMask = Parsed::getEnumMask()
 

Detailed Description

template<typename RegexStr, typename TuningParameters = RegexTuningParameters>
class sp::regex::Regex< RegexStr, TuningParameters >

A regex.

Use the REGEX_PATTERN macro to initialise the template parameter with your regex:

using catRegex = sp::regex::Regex<REGEX_PATTERN("ca+ts")>;
A regex.
Definition: Regex.hpp:56

The regex is parsed in the C++ template system and specialised code generated for it.

Complicated regexes may have a detrimental effect on compile-time. You can mitigate this by spreading your regexes across more translation units.

Example

CPU-side string search, with extraction of submatches.

#include <spec/regex/Regex.hpp>
void testCPUFindAll(std::ostream &out)
{
// Initialise a multiline string:
sp::StringView<const char> EXAMPLE_STRING = R"(
Trains are cool.
Potatoes are cool.
Beige is definitely not cool.
)";
using coolRegex = sp::regex::Regex<REGEX_PATTERN("([a-zA-Z]+) (?:is|are) cool")>;
out << "The following things are cool:\n";
auto lambda = [&](const coolRegex::Match match) {
// Cut capture group 1 out of the input string.
sp::Vec<int, 2> matchBounds = match[1];
out << EXAMPLE_STRING.substr(matchBounds) << '\n';
return false;
};
coolRegex::mapOverMatches(EXAMPLE_STRING, lambda);
}
constexpr ThisType substr(IntT pos=0, IntT count=npos)
See also
sp::regex::GPURegexMatcher For how to use GPU matching.
Template Parameters
RegexStrThe regex to match.

Member Function Documentation

◆ findAll()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static __host__ std::vector< Match > sp::regex::Regex< RegexStr, TuningParameters >::findAll ( StringView< CharT >  str)
static

Find all the matches of this regex in the string.

Note
This routine is sequential: if you're looking for GPU-accelerated parallel search, use sp::regex::GPURegexMatcher.

This API materialises the matches in an std::vector for convenience. If you want to process the matches as they are produced (potentially stopping before calculating all of them) it's more efficient to use the functional map API, especially because it avoids allocation overheads in std::vector.

Regex::mapOverMatches(someString, doSomething);
static __host__ void mapOverMatches(StringView< CharT > str, MatchConsumer mc)
Map a function over the matches in a string.
Definition: Regex.hpp:311

◆ findFirst()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static __host__ Match sp::regex::Regex< RegexStr, TuningParameters >::findFirst ( StringView< CharT >  str)
static

Returns the first match of a substring in the string.

◆ findFirstN()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static __host__ int sp::regex::Regex< RegexStr, TuningParameters >::findFirstN ( StringView< CharT >  str,
Match output,
int  n 
)
static

Find the first n matches of this regex in the string (or all of the matches if there are fewer than n).

This avoids the vector allocation overheads inherent in findAll.

Parameters
strString to search;
outputWhere to materialise the matches;
nMaximum number of matches to return;
Returns
Actual number of matches returned.

◆ getCompleteMatch()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static Match sp::regex::Regex< RegexStr, TuningParameters >::getCompleteMatch ( const StringView< CharT > &  str)
static

Returns a match iff the regular expression matches the entire target string.

◆ hasMatch()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static __host__ bool sp::regex::Regex< RegexStr, TuningParameters >::hasMatch ( StringView< CharT >  str)
static

Returns true iff the regular expression matches a substring of the string.

◆ hasPrefixMatch()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static bool sp::regex::Regex< RegexStr, TuningParameters >::hasPrefixMatch ( StringView< CharT >  str)
static

Returns true iff the regular expression matches the beginning of the string.

◆ inplaceAllocatingReplaceAllWithLambda()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT , typename MatchInterpolator , typename Allocator = decltype(arrayAllocator<CharT>), typename Deallocator = decltype(arrayDeleter<CharT>)>
static __host__ int sp::regex::Regex< RegexStr, TuningParameters >::inplaceAllocatingReplaceAllWithLambda ( StringView< CharT > &  buffer,
int  inputLength,
MatchInterpolator  mi,
Allocator  alloc = arrayAllocator<CharT>,
Deallocator  dealloc = arrayDeleter<CharT> 
)
static

Perform an inplace replace-all which can allocate more memory if the output is too large, using a lambda to generate the replacements.

Parameters
bufferA StringView containing the input/output buffer on entry. The first inputLength of the buffer should be the input string. As a result, the size of this buffer must be at least inputLength characters in length. The buffer StringView is a reference, and may be written to if a larger buffer must be allocated to complete the replacement operation. The size of the buffer upon completion is the size of the allocated buffer, which may be larger than the size of the output string.
inputLengthThe number of characters that comprise the input.
miA function from input string view and match (sp::StringView<const CharT>, Match) to list of impl::ReplacementAction<CharT>. The list must be a range-for iterable type.
allocA function that allocates memory. It must accept a number of CharT to allocate and return a pointer to CharT. By default, new[] is used.
deallocA function that deallocates memory allocated with alloc, and can also deallocate the memory originally pointed to by buffer. It must accept a pointer to CharT. By default, delete[] is used.
Returns
The number of characters that comprise the output.

◆ inplaceAllocatingReplaceAllWithLiteral()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT , typename Allocator = decltype(arrayAllocator<CharT>), typename Deallocator = decltype(arrayDeleter<CharT>)>
static __host__ int sp::regex::Regex< RegexStr, TuningParameters >::inplaceAllocatingReplaceAllWithLiteral ( StringView< CharT > &  buffer,
int  inputLength,
StringView< const CharT >  replacement,
Allocator  alloc = arrayAllocator<CharT>,
Deallocator  dealloc = arrayDeleter<CharT> 
)
static

Perform an inplace replace-all with a string literal, which can allocate more memory if the output is too large.

Most of the arguments to this function, as well as its return value, are the same as for inplaceAllocatingReplaceAllWithLambda().

Parameters
buffer
See also
inplaceExpandingReplaceAllWithLambda
Parameters
inputLength
See also
inplaceExpandingReplaceAllWithLambda
Parameters
replacementA function from input string view and match (sp::StringView<const CharT>, Match) to list of impl::ReplacementAction<CharT>. The list must be a range-for iterable type.
alloc
See also
inplaceExpandingReplaceAllWithLambda
Parameters
dealloc
See also
inplaceExpandingReplaceAllWithLambda

◆ inplaceExpandingReplaceAllWithLambda()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT , typename MatchInterpolator >
static __host__ int sp::regex::Regex< RegexStr, TuningParameters >::inplaceExpandingReplaceAllWithLambda ( StringView< CharT > &  buffer,
int  inputLength,
MatchInterpolator  mi 
)
static

Perform an inplace replace-all which can use extra space in the buffer if the output needs to grow.

TODO: Optimize this to avoid copies if possible.

Parameters
bufferA StringView containing the input/output buffer. The first inputLength of the buffer should be the input string. As a result, the size of this buffer must be at least inputLength characters in length. The size of this buffer is overwritten to be the output size.
inputLengthThe number of characters that comprise the input.
miA function from input string view and match (sp::StringView<const CharT>, Match) to list of impl::ReplacementAction<CharT>. The list must be a range-for iterable type.
Returns
The number of characters that comprise the output. A negative number if the buffer was too small.

◆ isCompleteMatch()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static bool sp::regex::Regex< RegexStr, TuningParameters >::isCompleteMatch ( const StringView< CharT > &  str)
static

Returns true iff the regular expression matches the entire target string.

◆ mapOverMatches()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<bool Noncapturing = false, typename CharT , typename MatchConsumer >
static __host__ void sp::regex::Regex< RegexStr, TuningParameters >::mapOverMatches ( StringView< CharT >  str,
MatchConsumer  mc 
)
static

Map a function over the matches in a string.

The MatchConsumer function should return a boolean specifying whether to abort searching; this is useful if you only want to process the first n matches, for instance.

The MatchConsumer function will be evaluated sequentially on all matches in the string, terminating when it returns true or when all the matches in the string have been exhausted (whichever is first).

◆ matchPrefix()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename CharT >
static Match sp::regex::Regex< RegexStr, TuningParameters >::matchPrefix ( StringView< CharT >  str)
static

Returns the match starting at the beginning of the string.

◆ replaceAll()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<typename ReplacementString , ReplacementMode RM = ReplacementMode::SAFE, typename CharT >
static __host__ int sp::regex::Regex< RegexStr, TuningParameters >::replaceAll ( StringView< const CharT >  str,
StringView< CharT > &  out 
)
static

Replace all matches of this regex in str with the pattern described by ReplacementString.

The input and output StringViews must be disjoint.

ReplacementString can use most of the language features for regex replacement strings, documented here.

An overload for this API is provided to allow specification of ReplacementMode or PrefixFilterDepth or neither.

Template Parameters
ReplacementStringA replacement pattern specification.
ReplacementModeSAFE, UNSAFE, or INPLACE. Only set to UNSAFE if the output buffer is known to be sufficient. Defaults to SAFE.
Parameters
strString to search.
outMemory to overwrite with the output. This must be sufficiently large. This view will have its size reduced to represent the output string.
Returns
the number of characters that would be in the output string if the buffer were large enough.

◆ replaceAllWithLiteral()

template<typename RegexStr , typename TuningParameters = RegexTuningParameters>
template<ReplacementMode RM = ReplacementMode::SAFE, typename CharT >
static __host__ int sp::regex::Regex< RegexStr, TuningParameters >::replaceAllWithLiteral ( StringView< const CharT >  str,
StringView< const CharT >  replacement,
StringView< CharT > &  out 
)
static

Replace all matches of this regex in str with the literal string replacement.

The input and output StringViews must be disjoint. As must the replacement and output StringViews.

Parameters
strString to search.
replacementThe string to replace matches with.
outMemory to overwrite with the output. This must be sufficiently large. This view will have its size reduced to represent the output string.