Files
aho_corasick
ansi_term
arrayvec
atty
backtrace
backtrace_sys
base64
bincode
bitflags
byteorder
bytes
c2_chacha
capnp
capnp_futures
capnp_rpc
cfg_if
chrono
clap
crossbeam_deque
crossbeam_epoch
crossbeam_queue
crossbeam_utils
ctrlc
daemon
failure
failure_derive
flexi_logger
fnv
futures
getrandom
glob
hid_io
api
device
module
protocol
hidapi
install_service
iovec
lazy_static
libc
lock_api
log
memchr
memoffset
mio
mio_uds
nanoid
net2
nix
nodrop
num_cpus
num_integer
num_traits
open
parking_lot
parking_lot_core
pem
ppv_lite86
proc_macro2
quote
rand
rand_chacha
rand_core
rand_hc
rand_isaac
rand_jitter
rand_os
rand_pcg
rand_xorshift
rcgen
regex
regex_syntax
remove_dir_all
ring
rustc_demangle
rustls
scoped_tls
scopeguard
sct
serde
slab
smallvec
spin
stream_cancel
strsim
syn
synstructure
tempfile
textwrap
thread_local
time
tokio
tokio_codec
tokio_core
tokio_current_thread
tokio_executor
tokio_fs
tokio_io
tokio_reactor
tokio_rustls
tokio_sync
tokio_tcp
tokio_threadpool
tokio_timer
tokio_udp
tokio_uds
unicode_width
unicode_xid
untrusted
vec_map
void
webpki
windows_service
x11
xcb
xkbcommon
yasna
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
use std::cmp;
use std::io;
use std::ptr;

/// The default buffer capacity that we use for the stream buffer.
const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB

/// A fairly simple roll buffer for supporting stream searches.
///
/// This buffer acts as a temporary place to store a fixed amount of data when
/// reading from a stream. Its central purpose is to allow "rolling" some
/// suffix of the data to the beginning of the buffer before refilling it with
/// more data from the stream. For example, let's say we are trying to match
/// "foobar" on a stream. When we report the match, we'd like to not only
/// report the correct offsets at which the match occurs, but also the matching
/// bytes themselves. So let's say our stream is a file with the following
/// contents: `test test foobar test test`. Now assume that we happen to read
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
/// Naively, it would not be possible to report a single contiguous `foobar`
/// match, but this roll buffer allows us to do that. Namely, after the second
/// read, the contents of the buffer should be `st foobar test test`, where the
/// search should ultimately resume immediately after `foo`. (The prefix `st `
/// is included because the roll buffer saves N bytes at the end of the buffer,
/// where N is the maximum possible length of a match.)
///
/// A lot of the logic for dealing with this is unfortunately split out between
/// this roll buffer and the `StreamChunkIter`.
#[derive(Debug)]
pub struct Buffer {
    /// The raw buffer contents. This has a fixed size and never increases.
    buf: Vec<u8>,
    /// The minimum size of the buffer, which is equivalent to the maximum
    /// possible length of a match. This corresponds to the amount that we
    /// roll
    min: usize,
    /// The end of the contents of this buffer.
    end: usize,
}

impl Buffer {
    /// Create a new buffer for stream searching. The minimum buffer length
    /// given should be the size of the maximum possible match length.
    pub fn new(min_buffer_len: usize) -> Buffer {
        let min = cmp::max(1, min_buffer_len);
        // The minimum buffer amount is also the amount that we roll our
        // buffer in order to support incremental searching. To this end,
        // our actual capacity needs to be at least 1 byte bigger than our
        // minimum amount, otherwise we won't have any overlap. In actuality,
        // we want our buffer to be a bit bigger than that for performance
        // reasons, so we set a lower bound of `8 * min`.
        //
        // TODO: It would be good to find a way to test the streaming
        // implementation with the minimal buffer size.
        let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
        Buffer { buf: vec![0; capacity], min, end: 0 }
    }

    /// Return the contents of this buffer.
    #[inline]
    pub fn buffer(&self) -> &[u8] {
        &self.buf[..self.end]
    }

    /// Return the minimum size of the buffer. The only way a buffer may be
    /// smaller than this is if the stream itself contains less than the
    /// minimum buffer amount.
    #[inline]
    pub fn min_buffer_len(&self) -> usize {
        self.min
    }

    /// Return the total length of the contents in the buffer.
    #[inline]
    pub fn len(&self) -> usize {
        self.end
    }

    /// Return all free capacity in this buffer.
    fn free_buffer(&mut self) -> &mut [u8] {
        &mut self.buf[self.end..]
    }

    /// Refill the contents of this buffer by reading as much as possible into
    /// this buffer's free capacity. If no more bytes could be read, then this
    /// returns false. Otherwise, this reads until it has filled the buffer
    /// past the minimum amount.
    pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
        let mut readany = false;
        loop {
            let readlen = rdr.read(self.free_buffer())?;
            if readlen == 0 {
                return Ok(readany);
            }
            readany = true;
            self.end += readlen;
            if self.len() >= self.min {
                return Ok(true);
            }
        }
    }

    /// Roll the contents of the buffer so that the suffix of this buffer is
    /// moved to the front and all other contents are dropped. The size of the
    /// suffix corresponds precisely to the minimum buffer length.
    ///
    /// This should only be called when the entire contents of this buffer have
    /// been searched.
    pub fn roll(&mut self) {
        let roll_start = self
            .end
            .checked_sub(self.min)
            .expect("buffer capacity should be bigger than minimum amount");
        let roll_len = self.min;

        assert!(roll_start + roll_len <= self.end);
        unsafe {
            // SAFETY: A buffer contains Copy data, so there's no problem
            // moving it around. Safety also depends on our indices being in
            // bounds, which they always should be, given the assert above.
            ptr::copy(
                self.buf[roll_start..].as_ptr(),
                self.buf.as_mut_ptr(),
                roll_len,
            );
        }
        self.end = roll_len;
    }
}