// This module defines pure Rust platform independent implementations of all
// the memchr routines. We do our best to make them fast. Some of them may even
// get auto-vectorized.

use core::cmp;
use core::ptr;
use core::usize;

#[cfg(target_pointer_width = "32")]
const USIZE_BYTES: usize = 4;

#[cfg(target_pointer_width = "64")]
const USIZE_BYTES: usize = 8;

// The number of bytes to loop at in one iteration of memchr/memrchr.
const LOOP_SIZE: usize = 2 * USIZE_BYTES;

/// Return `true` if `x` contains any zero byte.
///
/// From *Matters Computational*, J. Arndt
///
/// "The idea is to subtract one from each of the bytes and then look for
/// bytes where the borrow propagated all the way to the most significant
/// bit."
#[inline(always)]
fn contains_zero_byte(x: usize) -> bool {
    const LO_U64: u64 = 0x0101010101010101;
    const HI_U64: u64 = 0x8080808080808080;

    const LO_USIZE: usize = LO_U64 as usize;
    const HI_USIZE: usize = HI_U64 as usize;

    x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
}

/// Repeat the given byte into a word size number. That is, every 8 bits
/// is equivalent to the given byte. For example, if `b` is `\x4E` or
/// `01001110` in binary, then the returned value on a 32-bit system would be:
/// `01001110_01001110_01001110_01001110`.
#[inline(always)]
fn repeat_byte(b: u8) -> usize {
    (b as usize) * (usize::MAX / 255)
}

pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
    let vn1 = repeat_byte(n1);
    let confirm = |byte| byte == n1;
    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
    let align = USIZE_BYTES - 1;
    let start_ptr = haystack.as_ptr();
    let end_ptr = haystack[haystack.len()..].as_ptr();
    let mut ptr = start_ptr;

    unsafe {
        if haystack.len() < USIZE_BYTES {
            return forward_search(start_ptr, end_ptr, ptr, confirm);
        }

        let chunk = read_unaligned_usize(ptr);
        if contains_zero_byte(chunk ^ vn1) {
            return forward_search(start_ptr, end_ptr, ptr, confirm);
        }

        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
        debug_assert!(ptr > start_ptr);
        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
        while loop_size == LOOP_SIZE && ptr <= ptr_sub(end_ptr, loop_size) {
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);

            let a = *(ptr as *const usize);
            let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
            let eqa = contains_zero_byte(a ^ vn1);
            let eqb = contains_zero_byte(b ^ vn1);
            if eqa || eqb {
                break;
            }
            ptr = ptr_add(ptr, LOOP_SIZE);
        }
        forward_search(start_ptr, end_ptr, ptr, confirm)
    }
}

/// Like `memchr`, but searches for two bytes instead of one.
pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
    let vn1 = repeat_byte(n1);
    let vn2 = repeat_byte(n2);
    let confirm = |byte| byte == n1 || byte == n2;
    let align = USIZE_BYTES - 1;
    let start_ptr = haystack.as_ptr();
    let end_ptr = haystack[haystack.len()..].as_ptr();
    let mut ptr = start_ptr;

    unsafe {
        if haystack.len() < USIZE_BYTES {
            return forward_search(start_ptr, end_ptr, ptr, confirm);
        }

        let chunk = read_unaligned_usize(ptr);
        let eq1 = contains_zero_byte(chunk ^ vn1);
        let eq2 = contains_zero_byte(chunk ^ vn2);
        if eq1 || eq2 {
            return forward_search(start_ptr, end_ptr, ptr, confirm);
        }

        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
        debug_assert!(ptr > start_ptr);
        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
        while ptr <= ptr_sub(end_ptr, USIZE_BYTES) {
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);

            let chunk = *(ptr as *const usize);
            let eq1 = contains_zero_byte(chunk ^ vn1);
            let eq2 = contains_zero_byte(chunk ^ vn2);
            if eq1 || eq2 {
                break;
            }
            ptr = ptr_add(ptr, USIZE_BYTES);
        }
        forward_search(start_ptr, end_ptr, ptr, confirm)
    }
}

/// Like `memchr`, but searches for three bytes instead of one.
pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
    let vn1 = repeat_byte(n1);
    let vn2 = repeat_byte(n2);
    let vn3 = repeat_byte(n3);
    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
    let align = USIZE_BYTES - 1;
    let start_ptr = haystack.as_ptr();
    let end_ptr = haystack[haystack.len()..].as_ptr();
    let mut ptr = start_ptr;

    unsafe {
        if haystack.len() < USIZE_BYTES {
            return forward_search(start_ptr, end_ptr, ptr, confirm);
        }

        let chunk = read_unaligned_usize(ptr);
        let eq1 = contains_zero_byte(chunk ^ vn1);
        let eq2 = contains_zero_byte(chunk ^ vn2);
        let eq3 = contains_zero_byte(chunk ^ vn3);
        if eq1 || eq2 || eq3 {
            return forward_search(start_ptr, end_ptr, ptr, confirm);
        }

        ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
        debug_assert!(ptr > start_ptr);
        debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
        while ptr <= ptr_sub(end_ptr, USIZE_BYTES) {
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);

            let chunk = *(ptr as *const usize);
            let eq1 = contains_zero_byte(chunk ^ vn1);
            let eq2 = contains_zero_byte(chunk ^ vn2);
            let eq3 = contains_zero_byte(chunk ^ vn3);
            if eq1 || eq2 || eq3 {
                break;
            }
            ptr = ptr_add(ptr, USIZE_BYTES);
        }
        forward_search(start_ptr, end_ptr, ptr, confirm)
    }
}

/// Return the last index matching the byte `x` in `text`.
pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
    let vn1 = repeat_byte(n1);
    let confirm = |byte| byte == n1;
    let loop_size = cmp::min(LOOP_SIZE, haystack.len());
    let align = USIZE_BYTES - 1;
    let start_ptr = haystack.as_ptr();
    let end_ptr = haystack[haystack.len()..].as_ptr();
    let mut ptr = end_ptr;

    unsafe {
        if haystack.len() < USIZE_BYTES {
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
        }

        let chunk = read_unaligned_usize(ptr_sub(ptr, USIZE_BYTES));
        if contains_zero_byte(chunk ^ vn1) {
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
        }

        ptr = (end_ptr as usize & !align) as *const u8;
        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
        while loop_size == LOOP_SIZE && ptr >= ptr_add(start_ptr, loop_size) {
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);

            let a = *(ptr_sub(ptr, 2 * USIZE_BYTES) as *const usize);
            let b = *(ptr_sub(ptr, 1 * USIZE_BYTES) as *const usize);
            let eqa = contains_zero_byte(a ^ vn1);
            let eqb = contains_zero_byte(b ^ vn1);
            if eqa || eqb {
                break;
            }
            ptr = ptr_sub(ptr, loop_size);
        }
        reverse_search(start_ptr, end_ptr, ptr, confirm)
    }
}

/// Like `memrchr`, but searches for two bytes instead of one.
pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
    let vn1 = repeat_byte(n1);
    let vn2 = repeat_byte(n2);
    let confirm = |byte| byte == n1 || byte == n2;
    let align = USIZE_BYTES - 1;
    let start_ptr = haystack.as_ptr();
    let end_ptr = haystack[haystack.len()..].as_ptr();
    let mut ptr = end_ptr;

    unsafe {
        if haystack.len() < USIZE_BYTES {
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
        }

        let chunk = read_unaligned_usize(ptr_sub(ptr, USIZE_BYTES));
        let eq1 = contains_zero_byte(chunk ^ vn1);
        let eq2 = contains_zero_byte(chunk ^ vn2);
        if eq1 || eq2 {
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
        }

        ptr = (end_ptr as usize & !align) as *const u8;
        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
        while ptr >= ptr_add(start_ptr, USIZE_BYTES) {
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);

            let chunk = *(ptr_sub(ptr, USIZE_BYTES) as *const usize);
            let eq1 = contains_zero_byte(chunk ^ vn1);
            let eq2 = contains_zero_byte(chunk ^ vn2);
            if eq1 || eq2 {
                break;
            }
            ptr = ptr_sub(ptr, USIZE_BYTES);
        }
        reverse_search(start_ptr, end_ptr, ptr, confirm)
    }
}

/// Like `memrchr`, but searches for three bytes instead of one.
pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
    let vn1 = repeat_byte(n1);
    let vn2 = repeat_byte(n2);
    let vn3 = repeat_byte(n3);
    let confirm = |byte| byte == n1 || byte == n2 || byte == n3;
    let align = USIZE_BYTES - 1;
    let start_ptr = haystack.as_ptr();
    let end_ptr = haystack[haystack.len()..].as_ptr();
    let mut ptr = end_ptr;

    unsafe {
        if haystack.len() < USIZE_BYTES {
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
        }

        let chunk = read_unaligned_usize(ptr_sub(ptr, USIZE_BYTES));
        let eq1 = contains_zero_byte(chunk ^ vn1);
        let eq2 = contains_zero_byte(chunk ^ vn2);
        let eq3 = contains_zero_byte(chunk ^ vn3);
        if eq1 || eq2 || eq3 {
            return reverse_search(start_ptr, end_ptr, ptr, confirm);
        }

        ptr = (end_ptr as usize & !align) as *const u8;
        debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
        while ptr >= ptr_add(start_ptr, USIZE_BYTES) {
            debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);

            let chunk = *(ptr_sub(ptr, USIZE_BYTES) as *const usize);
            let eq1 = contains_zero_byte(chunk ^ vn1);
            let eq2 = contains_zero_byte(chunk ^ vn2);
            let eq3 = contains_zero_byte(chunk ^ vn3);
            if eq1 || eq2 || eq3 {
                break;
            }
            ptr = ptr_sub(ptr, USIZE_BYTES);
        }
        reverse_search(start_ptr, end_ptr, ptr, confirm)
    }
}

#[inline(always)]
unsafe fn forward_search<F: Fn(u8) -> bool>(
    start_ptr: *const u8,
    end_ptr: *const u8,
    mut ptr: *const u8,
    confirm: F,
) -> Option<usize> {
    debug_assert!(start_ptr <= ptr);
    debug_assert!(ptr <= end_ptr);

    while ptr < end_ptr {
        if confirm(*ptr) {
            return Some(sub(ptr, start_ptr));
        }
        ptr = ptr.offset(1);
    }
    None
}

#[inline(always)]
unsafe fn reverse_search<F: Fn(u8) -> bool>(
    start_ptr: *const u8,
    end_ptr: *const u8,
    mut ptr: *const u8,
    confirm: F,
) -> Option<usize> {
    debug_assert!(start_ptr <= ptr);
    debug_assert!(ptr <= end_ptr);

    while ptr > start_ptr {
        ptr = ptr.offset(-1);
        if confirm(*ptr) {
            return Some(sub(ptr, start_ptr));
        }
    }
    None
}

/// Increment the given pointer by the given amount.
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
    debug_assert!(amt < ::core::isize::MAX as usize);
    ptr.offset(amt as isize)
}

/// Decrement the given pointer by the given amount.
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
    debug_assert!(amt < ::core::isize::MAX as usize);
    ptr.offset((amt as isize).wrapping_neg())
}

unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
    let mut n: usize = 0;
    ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
    n
}

/// Subtract `b` from `a` and return the difference. `a` should be greater than
/// or equal to `b`.
fn sub(a: *const u8, b: *const u8) -> usize {
    debug_assert!(a >= b);
    (a as usize) - (b as usize)
}