Files
aho_corasick
ansi_term
arrayvec
atty
backtrace
backtrace_sys
base64
bincode
bitflags
byteorder
bytes
c2_chacha
capnp
capnp_futures
capnp_rpc
cfg_if
chrono
clap
crossbeam_deque
crossbeam_epoch
crossbeam_queue
crossbeam_utils
ctrlc
daemon
failure
failure_derive
flexi_logger
fnv
futures
getrandom
glob
hid_io
api
device
module
protocol
hidapi
install_service
iovec
lazy_static
libc
lock_api
log
memchr
memoffset
mio
mio_uds
nanoid
net2
nix
nodrop
num_cpus
num_integer
num_traits
open
parking_lot
parking_lot_core
pem
ppv_lite86
proc_macro2
quote
rand
rand_chacha
rand_core
rand_hc
rand_isaac
rand_jitter
rand_os
rand_pcg
rand_xorshift
rcgen
regex
regex_syntax
remove_dir_all
ring
rustc_demangle
rustls
scoped_tls
scopeguard
sct
serde
slab
smallvec
spin
stream_cancel
strsim
syn
synstructure
tempfile
textwrap
thread_local
time
tokio
tokio_codec
tokio_core
tokio_current_thread
tokio_executor
tokio_fs
tokio_io
tokio_reactor
tokio_rustls
tokio_sync
tokio_tcp
tokio_threadpool
tokio_timer
tokio_udp
tokio_uds
unicode_width
unicode_xid
untrusted
vec_map
void
webpki
windows_service
x11
xcb
xkbcommon
yasna
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
/// Slot is a single saved capture location. Note that there are two slots for
/// every capture in a regular expression (one slot each for the start and end
/// of the capture).
pub type Slot = Option<usize>;

/// Locations represents the offsets of each capturing group in a regex for
/// a single match.
///
/// Unlike `Captures`, a `Locations` value only stores offsets.
#[doc(hidden)]
#[derive(Clone, Debug)]
pub struct Locations(Vec<Slot>);

impl Locations {
    /// Returns the start and end positions of the Nth capture group. Returns
    /// `None` if `i` is not a valid capture group or if the capture group did
    /// not match anything. The positions returned are *always* byte indices
    /// with respect to the original string matched.
    pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
        let (s, e) = (i * 2, i * 2 + 1);
        match (self.0.get(s), self.0.get(e)) {
            (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
            _ => None,
        }
    }

    /// Creates an iterator of all the capture group positions in order of
    /// appearance in the regular expression. Positions are byte indices
    /// in terms of the original string matched.
    pub fn iter(&self) -> SubCapturesPosIter {
        SubCapturesPosIter { idx: 0, locs: self }
    }

    /// Returns the total number of capturing groups.
    ///
    /// This is always at least `1` since every regex has at least `1`
    /// capturing group that corresponds to the entire match.
    pub fn len(&self) -> usize {
        self.0.len() / 2
    }

    /// Return the individual slots as a slice.
    pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
        &mut self.0
    }
}

/// An iterator over capture group positions for a particular match of a
/// regular expression.
///
/// Positions are byte indices in terms of the original string matched.
///
/// `'c` is the lifetime of the captures.
pub struct SubCapturesPosIter<'c> {
    idx: usize,
    locs: &'c Locations,
}

impl<'c> Iterator for SubCapturesPosIter<'c> {
    type Item = Option<(usize, usize)>;

    fn next(&mut self) -> Option<Option<(usize, usize)>> {
        if self.idx >= self.locs.len() {
            return None;
        }
        let x = match self.locs.pos(self.idx) {
            None => Some(None),
            Some((s, e)) => Some(Some((s, e))),
        };
        self.idx += 1;
        x
    }
}

/// `RegularExpression` describes types that can implement regex searching.
///
/// This trait is my attempt at reducing code duplication and to standardize
/// the internal API. Specific duplication that is avoided are the `find`
/// and `capture` iterators, which are slightly tricky.
///
/// It's not clear whether this trait is worth it, and it also isn't
/// clear whether it's useful as a public trait or not. Methods like
/// `next_after_empty` reak of bad design, but the rest of the methods seem
/// somewhat reasonable. One particular thing this trait would expose would be
/// the ability to start the search of a regex anywhere in a haystack, which
/// isn't possible in the current public API.
pub trait RegularExpression: Sized {
    /// The type of the haystack.
    type Text: ?Sized;

    /// The number of capture slots in the compiled regular expression. This is
    /// always two times the number of capture groups (two slots per group).
    fn slots_len(&self) -> usize;

    /// Allocates fresh space for all capturing groups in this regex.
    fn locations(&self) -> Locations {
        Locations(vec![None; self.slots_len()])
    }

    /// Returns the position of the next character after `i`.
    ///
    /// For example, a haystack with type `&[u8]` probably returns `i+1`,
    /// whereas a haystack with type `&str` probably returns `i` plus the
    /// length of the next UTF-8 sequence.
    fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;

    /// Returns the location of the shortest match.
    fn shortest_match_at(
        &self,
        text: &Self::Text,
        start: usize,
    ) -> Option<usize>;

    /// Returns whether the regex matches the text given.
    fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;

    /// Returns the leftmost-first match location if one exists.
    fn find_at(
        &self,
        text: &Self::Text,
        start: usize,
    ) -> Option<(usize, usize)>;

    /// Returns the leftmost-first match location if one exists, and also
    /// fills in any matching capture slot locations.
    fn captures_read_at(
        &self,
        locs: &mut Locations,
        text: &Self::Text,
        start: usize,
    ) -> Option<(usize, usize)>;

    /// Returns an iterator over all non-overlapping successive leftmost-first
    /// matches.
    fn find_iter(self, text: &Self::Text) -> Matches<Self> {
        Matches { re: self, text: text, last_end: 0, last_match: None }
    }

    /// Returns an iterator over all non-overlapping successive leftmost-first
    /// matches with captures.
    fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
        CaptureMatches(self.find_iter(text))
    }
}

/// An iterator over all non-overlapping successive leftmost-first matches.
pub struct Matches<'t, R>
where
    R: RegularExpression,
    R::Text: 't,
{
    re: R,
    text: &'t R::Text,
    last_end: usize,
    last_match: Option<usize>,
}

impl<'t, R> Matches<'t, R>
where
    R: RegularExpression,
    R::Text: 't,
{
    /// Return the text being searched.
    pub fn text(&self) -> &'t R::Text {
        self.text
    }

    /// Return the underlying regex.
    pub fn regex(&self) -> &R {
        &self.re
    }
}

impl<'t, R> Iterator for Matches<'t, R>
where
    R: RegularExpression,
    R::Text: 't + AsRef<[u8]>,
{
    type Item = (usize, usize);

    fn next(&mut self) -> Option<(usize, usize)> {
        if self.last_end > self.text.as_ref().len() {
            return None;
        }
        let (s, e) = match self.re.find_at(self.text, self.last_end) {
            None => return None,
            Some((s, e)) => (s, e),
        };
        if s == e {
            // This is an empty match. To ensure we make progress, start
            // the next search at the smallest possible starting position
            // of the next match following this one.
            self.last_end = self.re.next_after_empty(self.text, e);
            // Don't accept empty matches immediately following a match.
            // Just move on to the next match.
            if Some(e) == self.last_match {
                return self.next();
            }
        } else {
            self.last_end = e;
        }
        self.last_match = Some(e);
        Some((s, e))
    }
}

/// An iterator over all non-overlapping successive leftmost-first matches with
/// captures.
pub struct CaptureMatches<'t, R>(Matches<'t, R>)
where
    R: RegularExpression,
    R::Text: 't;

impl<'t, R> CaptureMatches<'t, R>
where
    R: RegularExpression,
    R::Text: 't,
{
    /// Return the text being searched.
    pub fn text(&self) -> &'t R::Text {
        self.0.text()
    }

    /// Return the underlying regex.
    pub fn regex(&self) -> &R {
        self.0.regex()
    }
}

impl<'t, R> Iterator for CaptureMatches<'t, R>
where
    R: RegularExpression,
    R::Text: 't + AsRef<[u8]>,
{
    type Item = Locations;

    fn next(&mut self) -> Option<Locations> {
        if self.0.last_end > self.0.text.as_ref().len() {
            return None;
        }
        let mut locs = self.0.re.locations();
        let (s, e) = match self.0.re.captures_read_at(
            &mut locs,
            self.0.text,
            self.0.last_end,
        ) {
            None => return None,
            Some((s, e)) => (s, e),
        };
        if s == e {
            self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
            if Some(e) == self.0.last_match {
                return self.next();
            }
        } else {
            self.0.last_end = e;
        }
        self.0.last_match = Some(e);
        Some(locs)
    }
}