C ++ Получить исходный код HTML
Я хотел бы знать, как я могу загрузить исходный HTML-код веб-сайта в строку без использования LibCurl. Я искал в Интернете примеры использования Wininet.
Ниже приведен пример кода, который я использовал для Wininet. Как бы я сделал то же самое с помощью Winsock?
#include "stdafx.h"
#include <windows.h>
#include <wininet.h>
#include <iostream>
#include <string>
#include <stdio.h>
#include <stdlib.h>
using namespace std;
#pragma comment ( lib, "Wininet.lib" )
int main()
{
HINTERNET hInternet = InternetOpenA("InetURL/1.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
HINTERNET hConnection = InternetConnectA(hInternet, "google.com", 80, " ", " ", INTERNET_SERVICE_HTTP, 0, 0);
HINTERNET hData = HttpOpenRequestA(hConnection, "GET", "/", NULL, NULL, NULL, INTERNET_FLAG_KEEP_CONNECTION, 0);
char buf[2048];
string lol;
HttpSendRequestA(hData, NULL, 0, NULL, 0);
DWORD bytesRead = 0;
DWORD totalBytesRead = 0;
// http://msdn.microsoft.com/en-us/library/aa385103(VS.85).aspx
// To ensure all data is retrieved, an application must continue to call the
// InternetReadFile function until the function returns TRUE and the
// lpdwNumberOfBytesRead parameter equals zero.
while (InternetReadFile(hData, buf, 2000, &bytesRead) && bytesRead != 0)
{
buf[bytesRead] = 0; // insert the null terminator.
puts(buf); // print it to the screen.
lol = lol + buf;
printf("%d bytes read\n", bytesRead);
totalBytesRead += bytesRead;
}
printf("\n\n END -- %d bytes read\n", bytesRead);
printf("\n\n END -- %d TOTAL bytes read\n", totalBytesRead);
InternetCloseHandle(hData);
InternetCloseHandle(hConnection);
InternetCloseHandle(hInternet);
cout << "\nThe beginning." << endl << endl << endl;
cout << lol << endl;
system("PAUSE");
}
Этот пример WinSock работает для сайтов без дополнительных путей. Как мне получить HTML-код страницы, подобной этой: (www.website.com/page)
#include "stdafx.h"
#include <iostream>
#include <winsock2.h>
#include <string>
#include <fstream>
using namespace std;
string get_source()
{
WSADATA WSAData;
WSAStartup(MAKEWORD(2, 0), &WSAData);
SOCKET sock;
SOCKADDR_IN sin;
char buffer[1024];
////////////////This is portion that is confusing me//////////////////////////////////////////////////
string srequete = "GET /id/AeroNX/ HTTP/1.1\r\n";
srequete += "Host: steamcommunity.com\r\n";
srequete += "Connection: close\r\n";
srequete += "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\n";
srequete += "Accept-Language: fr,fr-fr;q=0.8,en-us;q=0.5,en;q=0.3\r\n";
srequete += "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n";
srequete += "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3\r\n";
srequete += "Referer: http://pozzyx.net/\r\n";
srequete += "\r\n";
///////////////////////////////////////////////////////////////////////////////////////////////////////
size_t requete_taille = srequete.size() + 1;
char crequete[5000];
strncpy(crequete, srequete.c_str(), requete_taille);
int i = 0;
string source = "";
sock = socket(AF_INET, SOCK_STREAM, 0);
sin.sin_addr.s_addr = inet_addr("63.228.223.103"); // epguides.com //why wont it work for 72.233.89.200 (whatismyip.com)
sin.sin_family = AF_INET;
sin.sin_port = htons(80); // port HTTP.
connect(sock, (SOCKADDR *)&sin, sizeof(sin)); // on se connecte sur le site web.
send(sock, crequete, strlen(crequete), 0); // why do we send the string??
do
{
i = recv(sock, buffer, sizeof(buffer), 0); // le buffer récupère les données reçues.
source += buffer;
} while (i != 0);
closesocket(sock); // on ferme le socket.
WSACleanup();
return source;
}
void main()
{
ofstream fout;
fout.open("Buffer.txt");
fout << get_source(); // the string url doesnt matter
fout.close();
system("PAUSE");
}