程序员人生网站导航

Tinyhttp源码分析

栏目：php教程时间：2016-06-08 08:38:05

Tinyhttp源码分析

简介

Tinyhttp是1个轻量型Http Server，使用C语言开发，全部代码只500多行，还包括1个简单Client。

源码剖析

Tinyhttp程序的逻辑为：1个无线循环，1个要求，创建1个线程，以后线程函数处理每一个要求，然后解析HTTP要求，做1些判断，以后判断文件是不是可履行，不可履行，打开文件，输出给客户端（阅读器），可履行就创建管道，父子进程进行通讯。其整体处理流程以下：

每一个函数的作用以下：

// accept_request函数：处理从套接字上监听到的1个HTTP要求，此函数很大部份体现服务器处理要求流程。
void accept_request(void *);
// bad_request函数：返回给客户端这是个毛病要求，HTTP状态码400 Bad Request。
void bad_request(int);
// cat函数：读取服务器上某个文件写到socket套接字。
void cat(int, FILE *);
// cannot_execute函数：处理产生在履行cgi程序时出现的毛病。
void cannot_execute(int);
// error_die函数：把毛病信息写到perror并退出。
void error_die(const char *);
// execute_cgi函数：运行cgi程序的处理，是主要的函数。
void execute_cgi(int, const char *, const char *, const char *);
// get_line函数：读取套接字的1行，把回车换行等情况都统1为换行符结束。
int get_line(int, char *, int);
// headers函数：把HTTP响应的头部写到套接字。
void headers(int, const char *);
// not_found函数：处理找不到要求的文件时的情况。
void not_found(int);
// serve_file函数：调用cat函数把服务器文件返回给阅读器
void serve_file(int, const char *);
// startup函数：初始化httpd服务，包括建立套接字，绑定端口，进行监听等。
int startup(u_short *);
// unimplemented函数：返回给阅读器表明收到的HTTP要求所用的method不被支持。
void unimplemented(int);

分析其程序，流程为：main()——>startup()——>accept_request()——>execute_cgi()等。

核心函数

1）main()函数

// 服务器main函数
int main(void)
{
    int server_sock = ⑴;
    u_short port = 4000;
    int client_sock = ⑴;
    struct sockaddr_in client_name;
    socklen_t  client_name_len = sizeof(client_name);
    pthread_t newthread;

    // 建立1个监听套接字，在对应的端口建立httpd服务
    server_sock = startup(&port);
    printf("httpd running on port %d\n", port);
    // 进入循环，服务器通过调用accept等待客户真个连接，Accept会以阻塞的方式运行，直到
    // 有客户端连接才会返回。连接成功后，服务器启动1个新的线程来处理客户真个要求，处理
    // 完成后，重新等待新的客户端要求。
    while (1)
    {
        // 返回1个已连接套接字，套接字收到客户端连接要求
        client_sock = accept(server_sock,
                (struct sockaddr *)&client_name,
                &client_name_len);
        if (client_sock == ⑴)
            error_die("accept");
        // 派生线程用accept_request函数处理新要求。
        /* accept_request(client_sock); */
        if (pthread_create(&newthread , NULL, (void *)accept_request, (void *)&client_sock) != 0)
            perror("pthread_create");
    }
    // 出现意外退出的时候，关闭socket
    close(server_sock);

    return(0);
}

2）startup()函数

// startup函数：依照TCP连接的正常流程顺次调用socket，bind，listen函数。
// 监听套接字端口既可以指定也能够动态分配1个随机端口
int startup(u_short *port)
{
    int httpd = 0;
    struct sockaddr_in name;
    // 创建1个socket，建立socket连接
    httpd = socket(PF_INET, SOCK_STREAM, 0);
    if (httpd == ⑴)
        error_die("socket");
    // 填充结构体
    memset(&name, 0, sizeof(name));
    name.sin_family = AF_INET;
    name.sin_port = htons(*port);
    name.sin_addr.s_addr = htonl(INADDR_ANY);
    // 将socket绑定到对应的端口上
    if (bind(httpd, (struct sockaddr *)&name, sizeof(name)) < 0)
        error_die("bind");
    // 如果当前指定的端口是0，则动态随机分配1个端口
    if (*port == 0)  /* if dynamically allocating a port */
    {
        socklen_t namelen = sizeof(name);
        // 1.getsockname()可以取得1个与socket相干的地址
        //  1）服务器端可以通过它得到相干客户端地址
        //  2）客户端可以得到当前已连接成功的socket的IP和端口
        // 2.在客户端不进行bind而直接连接服务器时，且客户端需要知道当前使用哪一个IP地址
        //   进行通讯时比较有用（如多网卡的情况）
        if (getsockname(httpd, (struct sockaddr *)&name, &namelen) == ⑴)
            error_die("getsockname");
        *port = ntohs(name.sin_port);
    }
    // 开始监听
    if (listen(httpd, 5) < 0)
        error_die("listen");
    // 返回socket id
    return(httpd);
}

3）accept_request()函数

// 线程处理函数
void accept_request(void *arg)
{
    int client = *(int*)arg;
    char buf[1024];       // 读取行数据时的缓冲区
    size_t numchars;      // 读取了多少字符
    char method[255];     // 存储HTTP要求名称（字符串）
    char url[255];
    char path[512];
    size_t i, j;
    struct stat st;
    int cgi = 0;      /* becomes true if server decides this is a CGI
                       * program */
    char *query_string = NULL;

    // 1个HTTP要求报文由要求行（requestline）、要求头部（header）、空行和要求数据4个部份
    // 组成，要求行由要求方法字段（get或post）、URL字段和HTTP协议版本字段3个字段组成，它们
    // 用空格分隔。如：GET /index.html HTTP/1.1。
    // 解析要求行，把方法字段保存在method变量中。
    // 读取HTTP头第1行：GET/index.php HTTP 1.1
    numchars = get_line(client, buf, sizeof(buf));
    i = 0; j = 0;

    // 把客户真个要求方法存到method数组
    while (!ISspace(buf[i]) && (i < sizeof(method) - 1))
    {
        method[i] = buf[i];
        i++;
    }
    j=i;
    method[i] = '\0';

    // 只能辨认get和post
    if (strcasecmp(method, "GET") && strcasecmp(method, "POST"))
    {
        unimplemented(client);
        return;
    }

    // POST的时候开启cgi
    if (strcasecmp(method, "POST") == 0)
        cgi = 1;

    // 解析并保存要求的URL（如有问号，也包括问号及以后的内容）
    i = 0;
    // 跳过空白字符
    while (ISspace(buf[j]) && (j < numchars))
        j++;
    // 从缓冲区中把URL读取出来
    while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < numchars))
    {
        // 存在url
        url[i] = buf[j];
        i++; j++;
    }
    url[i] = '\0'; // 保存URL

    // 先处理如果是GET要求的情况
    // 如果是get方法，要求参数和对应的值附加在URL后面，利用1个问号（“？”）代表URL的结
    // 尾与要求参数的开始，传递参数长度受限制。如index.jsp?10023，其中10023就是要传递
    // 的参数。这段代码将参数保存在query_string中。
    if (strcasecmp(method, "GET") == 0)
    {
        // 待处理要求为url
        query_string = url;
        // 移动指针，去找GET参数，即?后面的部份
        while ((*query_string != '?') && (*query_string != '\0'))
            query_string++;
        // 如果找到了的话，说明这个要求也需要调用脚本来处理
        // 此时就把要求字符串单独抽取出来
        // GET方法特点，？后面为参数
        if (*query_string == '?')
        {
            // 开启cgi
            cgi = 1;
            // query_string指针指向的是真实的要求参数
            *query_string = '\0';
            query_string++;
        }
    }

    // 保存有效的url地址并加上要求地址的主页索引。默许的根目录是htdocs下
    // 这里是做以下路径拼接，由于url字符串以'/'开头，所以不用拼接新的分割符
    // 格式化url到path数组，html文件都早htdocs中
    sprintf(path, "htdocs%s", url);
    // 如果访问路径的最后1个字符时'/'，就为其补全，即默许访问index.html
    if (path[strlen(path) - 1] == '/')
        strcat(path, "index.html");

    // 访问要求的文件，如果文件不存在直接返回，如果存在就调用CGI程序来处理
    // 根据路径找到对应文件
    if (stat(path, &st) == ⑴) {
        // 如果不存在，就把剩下的要求头从缓冲区中读出去
        // 把所有headers的信息都抛弃
        while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
            numchars = get_line(client, buf, sizeof(buf));
        // 然后返回1个404毛病，即回应客户端找不到
        not_found(client);
    }
    else
    {
        // 如果文件存在但却是个目录，则继续拼接路径，默许访问这个目录下的index.html
        if ((st.st_mode & S_IFMT) == S_IFDIR)
            strcat(path, "/index.html");
        // 如果文件具有可履行权限，就履行它
        // 如果需要调用CGI（CGI标志位置1）在调用CGI之前有1段是对用户权限的判断，对应
        // 含义以下：S_IXUSR：用户可以履行
        //          S_IXGRP：组可以履行
        //          S_IXOTH：其它人可以履行
        if ((st.st_mode & S_IXUSR) ||
                (st.st_mode & S_IXGRP) ||
                (st.st_mode & S_IXOTH)    )
            cgi = 1;
        // 不是cgi，直接把服务器文件返回，否则履行cgi
        if (!cgi)
            serve_file(client, path);
        else
            execute_cgi(client, path, method, query_string);
    }

    // 断开与客户真个连接（HTTP特点：无连接）
    close(client);
}

4）execute_cgi()函数

此函数履行流程以下：

void execute_cgi(int client, const char *path,
        const char *method, const char *query_string)
{
    char buf[1024];
    int cgi_output[2];
    int cgi_input[2];
    pid_t pid;
    int status;
    int i;
    char c;
    int numchars = 1;
    int content_length = ⑴;

    // 首先需要根据要求是Get还是Post，来分别进行处理
    buf[0] = 'A'; buf[1] = '\0';
    // 如果是Get，那末就疏忽剩余的要求头
    if (strcasecmp(method, "GET") == 0)
        // 把所有的HTTP header读取并抛弃
        while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
            numchars = get_line(client, buf, sizeof(buf));
    // 如果是Post，那末就需要读出要求长度即Content-Length
    else if (strcasecmp(method, "POST") == 0) /*POST*/
    {
        // 对POST的HTTP要求中找出content_length
        numchars = get_line(client, buf, sizeof(buf));
        while ((numchars > 0) && strcmp("\n", buf))
        {
            // 使用\0进行分割
            buf[15] = '\0';
            // HTTP要求的特点
            if (strcasecmp(buf, "Content-Length:") == 0)
                content_length = atoi(&(buf[16]));
            numchars = get_line(client, buf, sizeof(buf));
        }
        // 如果要求长度不合法（比如根本就不是数字），那末就报错，即没有找到content_length
        if (content_length == ⑴) {
            // 毛病要求
            bad_request(client);
            return;
        }
    }
    else/*HEAD or other*/
    {
    }

    // 建立管道
    if (pipe(cgi_output) < 0) {
        // 毛病处理
        cannot_execute(client);
        return;
    }
    // 建立管道
    if (pipe(cgi_input) < 0) {
        // 毛病处理
        cannot_execute(client);
        return;
    }

    // fork本身，生成两个进程
    if ( (pid = fork()) < 0 ) {   // 复制1个线程
        // 毛病处理
        cannot_execute(client);
        return;
    }
    sprintf(buf, "HTTP/1.0 200 OK\r\n");
    send(client, buf, strlen(buf), 0);
    // 子进程要调用CGI脚本
    if (pid == 0)  /* child: CGI script */
    {
        // 环境变量缓冲区，会存在溢出风险
        char meth_env[255];
        char query_env[255];
        char length_env[255];
        // 重定向管道
        // 把父进程读写管道的描写符分别绑定到子进程的标准输入和输出
        // dup2功能与freopen()函数类似
        dup2(cgi_output[1], STDOUT);   // 把STDOUT重定向到cgi_output的写入端
        dup2(cgi_input[0], STDIN);     // 把STDIN重定向到cgi_input的读取端
        // 关闭没必要要的描写符
        close(cgi_output[0]);          // 关闭cgi_inout的写入端和cgi_output的读取端
        close(cgi_input[1]);

        // 服务器设置环境变量，即request_method的环境变量
        // 设置基本的CGI环境变量，要求类型、参数、长度之类
        sprintf(meth_env, "REQUEST_METHOD=%s", method);
        putenv(meth_env);
        if (strcasecmp(method, "GET") == 0) {
            // 设置query_string的环境变量
            sprintf(query_env, "QUERY_STRING=%s", query_string);
            putenv(query_env);
        }
        else {   /* POST */
            // 设置content_length的环境变量
            sprintf(length_env, "CONTENT_LENGTH=%d", content_length);
            putenv(length_env);
        }

        // 用execl运行cgi程序
        execl(path, NULL);
        exit(0);
    } else {    /* parent */

        // 父进程代码
        // 关闭cgi_input的读取端和cgi_output的写入端
        close(cgi_output[1]);
        close(cgi_input[0]);
        // 对Post要求，要直接write()给子进程
        // 这模样进程所调用的脚本就能够从标准输入获得Post数据
        if (strcasecmp(method, "POST") == 0)
            // 接收POST过来的数据
            for (i = 0; i < content_length; i++) {
                recv(client, &c, 1, 0);
                // 把POST数据写入cgi_input，现在重定向到STDIN
                write(cgi_input[1], &c, 1);
            }
        // 然后父进程再从输出管道里面读出所有结果，返回给客户端
        while (read(cgi_output[0], &c, 1) > 0)
            send(client, &c, 1, 0);

        // 关闭管道
        close(cgi_output[0]);
        close(cgi_input[1]);
        // 最后等待子进程结束，即等待子进程
        waitpid(pid, &status, 0);
    }
}

参考文献

http://armsword.com/2014/10/29/tinyhttpd-code-analyse/

http://blog.csdn.net/jcjc918/article/details/42129311

http://techlog.cn/article/list/10182680

------分隔线----------------------------

上一篇 随机采样和随机模拟：吉布斯采样Gibbs Sampling实现文档分类

下一篇 Bootstrap UI 编辑器哪家强？推荐以下最好的15款【快速GUI利器，一般人我不告诉他！】

------分隔线----------------------------