Untitled

                Never    
Text
       
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netdb.h>
#include <netinet/in.h>
int hyper_cnt = 0;

#define BUFFER_SIZE 1024
#define PORT 80
void extract_hostname_and_path(const char *url, char *hostname, char *path) {
    const char *http_prefix = "http://";
    const char *https_prefix = "https://";

    if (strncmp(url, http_prefix, strlen(http_prefix)) == 0) {
        sscanf(url, "http://%99[^/]%s", hostname, path);
    } 
    else if (strncmp(url, https_prefix, strlen(https_prefix)) == 0) {
        sscanf(url, "https://%99[^/]%s", hostname, path);
    } 
    else {
        sscanf(url, "%99[^/]%s", hostname, path);
    }
}

int get_ip_address(const char *hostname, char *ip_address) {
    struct addrinfo hints, *result, *p;
    int status;

    memset(&hints, 0, sizeof hints);
    hints.ai_family = AF_INET; 
    hints.ai_socktype = SOCK_STREAM;

    if ((status = getaddrinfo(hostname, NULL, &hints, &result)) != 0) {
        fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(status));
        return 1; 
    }

    for (p = result; p != NULL; p = p->ai_next) {
        struct sockaddr_in *ipv4 = (struct sockaddr_in *)p->ai_addr;
        void *addr = &(ipv4->sin_addr);

        inet_ntop(p->ai_family, addr, ip_address, INET_ADDRSTRLEN);

        break; 
    }

    freeaddrinfo(result); 

    return 0;
}

void extract_hyperlinks(const char *html) {
    const char *start = html;
    const char *href_start, *href_end;

    while ((href_start = strstr(start, "<a")) != NULL) {
        // Find the end of the <a> tag
        const char *a_end = strstr(href_start, "</a>");

        if (a_end == NULL) {
            break;  // No closing </a> tag found
        }

        // Find the href attribute within the <a> tag
        href_start = strstr(href_start, "href=\"");
        if (href_start != NULL && href_start < a_end) {
            href_start += 6;  // Move to the start of the URL
            href_end = strchr(href_start, '\"');  // Find the end of the URL
            if (href_end != NULL && href_end < a_end) {
                // Extract and print the URL
                int url_length = href_end - href_start;
                char url[url_length + 1];
                strncpy(url, href_start, url_length);
                url[url_length] = '\0';
                printf("%s\n", url);
                hyper_cnt++;
            }
        }

        // Move the start pointer to the end of the current </a> tag
        start = a_end + 4;
    }
}

int main() {
    char URL[1024];
    printf("Please enter the URL:");
    puts("");
    scanf("%s",URL);
    char hostname[1024];
    char path[1024];
    extract_hostname_and_path(URL, hostname, path);
    printf("Hostname: %s\n", hostname);
    printf("Path: %s\n", path);
    char ip_address[INET_ADDRSTRLEN];
    get_ip_address(hostname, ip_address);
    printf("%s\n",ip_address);
    printf("========== Socket ==========\n");
    int sockfd;
    struct sockaddr_in server_addr;
    socklen_t addrlen = sizeof(server_addr);
    char message[BUFFER_SIZE];
    char buffer_temp[1000000];
    if(path[0]!='/'){
        snprintf(message, sizeof(message), "GET /%s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n", path, hostname);
    }
    else{
        snprintf(message, sizeof(message), "GET %s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n", path, hostname);
    }
    printf("%s\n",message);

    unsigned char buffer[BUFFER_SIZE] = {'\0'};

    server_addr.sin_family = AF_INET;
    server_addr.sin_addr.s_addr = inet_addr(ip_address);
    server_addr.sin_port = htons(PORT);

    if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
        perror("socket()");
        exit(EXIT_FAILURE);
    }

    if (connect(sockfd, (struct sockaddr *)&server_addr, addrlen) == -1) {
        perror("connect()");
        fprintf(stderr, "Please start the server first\n");
        exit(EXIT_FAILURE);
    }

    printf("Sending HTTP request\n");
    send(sockfd, message, strlen(message), 0);

    // Consider using recv() in a loop for large data to ensure complete message reception
    printf("Recieving the response\n");
    printf("======== Hyperlinks ========\n");
    ssize_t bytes_received;

    while ((bytes_received = recv(sockfd, buffer, BUFFER_SIZE - 1, 0)) > 0) {
        buffer[bytes_received] = '\0';
        strcat(buffer_temp,buffer);
    }
    extract_hyperlinks(buffer_temp);

    if (bytes_received < 0) {
        perror("recv()");
    }
    printf("============================\n");
    printf("We have found %d hyperlinks\n",hyper_cnt);
    close(sockfd);
    return 0;
} 

Raw Text