搜索算法

搜索算法主要有:

  1. 线性搜索(linear search)
  2. 二分查找(binary search)
  3. 跳跃搜索(jump search)
  4. 插值搜索(interpolation search)
  5. 指数搜索-指数二分搜索(exponential search)
  6. 三分搜索(ternary search)

线性搜索数组无需排序时间复杂度为 O(n)

二分查找(二分搜索、二叉查找)数组需要排序,时间复杂度为 O(log₂n)

跳跃搜索数组需要排序,最优的跳跃块为 √ n,时间复杂度为 O(√ n),时间复杂度在 线性搜索和二分查找之间,二分查找优先选择,只有一种情况可以考虑跳跃搜索,即被搜索的数大多数情况下是最小元素或者比最小元素还要小(其实二分查找可以在开头判断一下即可),所以跳跃搜索大多数场景可被二分查找替换。

插值搜索数组需要排序,是二分查找的优化版,在元素均匀分布的情况下(即数组各个元素之间相差不多比如 {5,10,15,20,25,30}这种情况下效率极高时间复杂度接近于O(1)),时间复杂度为 O(loglogn),在最差的情况可以为 O(n),下面是方程式

// The idea of formula is to return higher value of pos
// when element to be searched is closer to arr[hi]. And
// smaller value when closer to arr[lo]
pos = lo + [ (x-arr[lo])*(hi-lo) / (arr[hi]-arr[Lo]) ]

arr[] ==> Array where elements need to be searched
x     ==> Element to be searched
lo    ==> Starting index in arr[]
hi    ==> Ending index in arr[]

(x-arr[lo]) / (arr[hi]-arr[Lo]) <= 1 即  ( (x-arr[lo]) / (arr[hi]-arr[Lo]) ) * (hi - lo) <= hi - lo


指数搜索数组需要排序,思想为以1开始,跟目标数值对比,然后继续2,然后是4,然后是8...直到确定要查找的数值在 i/2 和 i 之间,然后在 i/2 和 i 之间利用二分查找来确定。两种情况下比二分查找更适用:

1.被搜索的数组的元素个数非常大,接近于无穷,比如求方程式 f(x) = x^2 - 10*x - 20 当f(x)返回值为正数时x的最小值。

2.搜索的值里跟数组的第一个元素很近。


三分搜索数组需要排序,每次判断前把数组切成三段,然后取其中匹配的那一段继续重复操作,直到最后匹配

三分搜索与二分搜索对比:

假设二分搜索的方程式为 T(n) = T(n/2) + 2 这里的 2 代表每次调用执行的指令数,所以时间复杂度为 O(2log₂n) 而三分搜索每次都要比二分搜索多一次切分,多一次判断

所以三分搜索的方程式可以表示为 T(n) = T(n/3) + 4,时间复杂度可以表示为 O(4log₃n),所以我们只要对比 log₂n 和 2log₃n( (2/log₂3) * log₂n ),因为 2/log₂3) > 1,所以 2log₃n > log₂n。所以二分搜索相对优于三分搜索。


二分查找

二分超找为最常用的算法,搜索数组必须为有序的,时间复杂度为 T(n) = T(n/2) + c 即 O(log₂n)

下面是递归法二分超找

#include <stdio.h>

int b_search(int *arr, int cur, int end, int x)
{
    int     b;

    printf("cur:%d end:%d - %d\n", cur, end, x);

    // 如果只相差1
    if (cur == end - 1) {
        if (arr[cur] == x)
            return cur;
        else if(arr[end] == x)
            return end;
        else
            return -1;
    }

    b = (cur + end) / 2;

    if (arr[b] > x) {
        end = b;
    } else if(arr[b] < x) {
        cur = b;
    } else {
        return b;
    }

    return b_search(arr, cur, end, x);

}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("./a.out num\n");
        return -1;
    }

    int search_n = atoi(argv[1]);
    int arr[] = {1,4,7,10,20,25,35,100,500,1000};
    int len = sizeof(arr)/sizeof(int);

    int res = b_search(arr, 0, len - 1, search_n);
    
    if (res == -1)
        printf("not found\n");
    else
        printf("%d found at arr[%d]\n", search_n, res);
    return 0;
}

迭代法(优先选择,空间复杂度为O(1))

int b_search(int *arr, int cur, int end, int x)
{
    int     b;

    if (arr[0] > x || arr[end] < x)
        return -1;

    for (;;) {
        printf("cur:%d end:%d - %d\n", cur, end, x);
        // 如果只相差1
        if (cur >= end - 1) {
            if (arr[cur] == x)
                return cur;
            else if(arr[end] == x)
                return end;
            else
                return -1;
        }

        b = (cur + end) / 2;

        if (arr[b] > x) {
            end = b;
        } else if(arr[b] < x) {
            cur = b;
        } else {
            return b;
        }
    }

    return -1;
}


跳跃搜索

#include <stdio.h>
#include <math.h>

#define min(x,y) ((x)>(y)?(y):(x))

int jump_search(int *arr, int len, int x)
{
    int     m,cur,block,prev;

    if (arr[0] > x || arr[len - 1] < x)
        return -1;

    block = sqrt(len);
    cur = block;

    prev = 0;

    //每次跳跃 block 个索引
    while (arr[min(cur,len) - 1] < x) {
        prev = cur;
        cur += block;

        printf("jump to index: %d - %d\n", prev, arr[prev]);
    }

    m = min(cur, len);

    while (arr[prev] < x) {
        prev++;

        if (prev == m)
            return -1;
    }

    if (arr[prev] == x)
        return prev;

    return -1;
}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("./a.out num\n");
        return -1;
    }

    int search_n = atoi(argv[1]);
    int arr[] = {1,4,7,10,20,25,35,100,500,1000};
    int len = sizeof(arr)/sizeof(int);

    int res = jump_search(arr, len, search_n);
    
    if (res == -1)
        printf("not found\n");
    else
        printf("%d found at arr[%d]\n", search_n, res);
    return 0;
}


插值搜索

#include <stdio.h>

#define min(x,y) ((x)>(y)?(y):(x))

int interpolation_search(int *arr, int len, int x)
{   
    int low,high,pos;

    if (arr[0] > x || arr[len - 1] < x)
        return -1;

    low = 0;
    high = len - 1;

    for (;;) {
        printf("low:%d high:%d\n", low, high);
        if (low >= high - 1) {
            if (arr[low] == x)
                return low;
            else if (arr[high] == x)
                return high;

            return -1;
        }

        pos = low + ( (x - arr[low]) * (high - low) / (arr[high] - arr[low]) );

        if (arr[pos] < x)
            low = pos + 1;
        else if (arr[pos] > x)
            high = pos - 1;
        else
            return pos;
    }

}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("./a.out num\n");
        return -1;
    }

    int search_n = atoi(argv[1]);
    int arr[] = {5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,80};
    int len = sizeof(arr)/sizeof(int);

    int res = interpolation_search(arr, len, search_n);
    
    if (res == -1)
        printf("not found\n");
    else
        printf("%d found at arr[%d]\n", search_n, res);
    return 0;
}


指数搜索

#include <stdio.h>

#define dl_min(x,y) ((x)>(y)?(y):(x))

int b_search(int *arr, int cur, int end, int x)
{
    int     b;

    for (;;) {
        printf("cur:%d end:%d - %d\n", cur, end, x);
        // 如果只相差1
        if (cur >= end - 1) {
            if (arr[cur] == x)
                return cur;
            else if(arr[end] == x)
                return end;
            else
                return -1;
        }

        b = (cur + end) / 2;

        if (arr[b] > x) {
            end = b;
        } else if(arr[b] < x) {
            cur = b;
        } else {
            return b;
        }
    }

    return -1;
}

int exponential_search(int *arr, int len, int x)
{
    if (arr[0] > x || arr[len - 1] < x)
        return -1;

    if (arr[0] == x)
        return 0;

    int     i,n;
    n = len - 1;
    for (i = 1; i < n; i *= 2) {
        if (arr[i] >= x) {
            if (arr[i] == x)
                return i;
            break;
        }
    }

    return b_search(arr, i/2 , dl_min(i,n), x);
}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("./a.out num\n");
        return -1;
    }

    int search_n = atoi(argv[1]);
    int arr[] = {1,4,7,10,20,25,35,100,500,1000};
    int len = sizeof(arr)/sizeof(int);

    int res = exponential_search(arr, len, search_n);
    
    if (res == -1)
        printf("not found\n");
    else
        printf("%d found at arr[%d]\n", search_n, res);
    return 0;
}


三分搜索

#include <stdio.h>

#define dl_min(x,y) ((x)>(y)?(y):(x))

// {xxxxx cut_1 xxxxx cut_2 xxxxx}
int t_search(int *arr, int cur, int end, int x)
{
    int     b, cut_1, cut_2;

    if (arr[0] > x || arr[end] < x)
        return -1;

    for (;;) {
        printf("cur:%d end:%d - %d\n", cur, end, x);
        // 如果只相差2
        if (cur >= end - 2) {
            if (cur == end - 2) {
                if (arr[cur+1] == x)
                    return cur+1;
            }
            if (arr[cur] == x)
                return cur;
            else if(arr[end] == x)
                return end;
            else
                return -1;
        }

        cut_1 = cur + (end - cur)/3;
        cut_2 = cut_1 + (end - cur)/3;

        //边缘判断
        if (arr[cut_1] == x)
            return cut_1;
        if (arr[cut_2] == x)
            return cut_2;

        if (x < arr[cut_1]) {
            end = cut_1;
        } else if (x < arr[cut_2]) {
            cur = cut_1;
            end = cut_2;
        } else {
            cur = cut_2;
        }
    }

    return -1;
}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("./a.out num\n");
        return -1;
    }

    int search_n = atoi(argv[1]);
    int arr[] = {1,4,7,10,20,25,35,100,500,600,700,800,900,1000,1001,2000};
    int len = sizeof(arr)/sizeof(int);

    int res = t_search(arr, 0, len - 1, search_n);
    
    if (res == -1)
        printf("not found\n");
    else
        printf("%d found at arr[%d]\n", search_n, res);
    return 0;
}



参考

geeksforgeeks


上一篇: 时间复杂度计算
下一篇: 排序算法(1)
作者邮箱: 203328517@qq.com