martedì 26 aprile 2011

using System;
using System.Collections.Generic;
using System.Net;
using System.Text;
using HtmlAgilityPack;
using System.Drawing;
using System.Text.RegularExpressions;

class Downloader
{
public string GetSource(string url)
{
WebClient c = new WebClient();
byte[] response = c.DownloadData(url);
Console.WriteLine("get: downloading {0}", url);
Console.WriteLine();
System.Threading.Thread.Sleep(1000);
return Encoding.ASCII.GetString(response);
}
public void GetImage(string picUri,string sOutput)
{
Console.WriteLine("wget: downloading {0}", picUri);
Console.WriteLine();
WebRequest requestPic = WebRequest.Create(picUri);
WebResponse responsePic = requestPic.GetResponse();
Image webImage = Image.FromStream(responsePic.GetResponseStream());
webImage.Save(sOutput);
}
}
class HtmlManager
{

public List ExtractAllAHrefTags(string sHTML, string sFilter)
{
HtmlDocument hd = new HtmlDocument();
hd.LoadHtml(sHTML);

List hrefTags = new List();

foreach (HtmlNode link in hd.DocumentNode.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
if (att.Value.Contains(sFilter))
{
hrefTags.Add(att.Value);
}
}
return hrefTags;
}
public List ExtractAllSrcTags(string sHTML, string sFilter, string sNot)
{
HtmlDocument hd = new HtmlDocument();
hd.LoadHtml(sHTML);

List hrefTags = new List();

foreach (HtmlNode link in hd.DocumentNode.SelectNodes("//img[@src]"))
{
HtmlAttribute att = link.Attributes["src"];
bool unwanted = true;

foreach (string str in sFilter.Split(' '))
{
if (att.Value.Contains(str))
{
unwanted = false;
}
}
if (!unwanted)
{
foreach (string str in sNot.Split(' '))
{
if (att.Value.Contains(str))
{
unwanted = true;
}
}
if (!unwanted)
{
hrefTags.Add(att.Value);
}
}
}
return hrefTags;
}
}
class CSharpHttpClient
{


static void Main(string[] args)
{
/*if (args.Length != 1)
{
Console.WriteLine("usage: geget.exe url");
System.Environment.Exit(-1);
}*/
Downloader dlr = new Downloader();
string address = @"http://g.e-hentai.org/g/359039/15c99e73b9/";/*args[0]*/
string source = dlr.GetSource(address);

List sFullImageList = new List();

Match mtch = Regex.Match(source, "of \\d+ images");
mtch = Regex.Match(mtch.ToString(), "\\d+");

string nr = mtch.ToString();
int pages = int.Parse(nr);
int nrpages = pages / 20;
if (pages % 20 != 0)
{
nrpages++;
}
HtmlManager hm = new HtmlManager();
for (int i = 0; i < nrpages; i++)//for each page
{
source = dlr.GetSource(string.Concat(address,"p=",i));//get source of the page
List sImages = hm.ExtractAllAHrefTags(source, @"http://g.e-hentai.org/s/");//get links inside the page
foreach (string str in sImages) // for each link
{
sFullImageList.Add(str); //add it to sFullImageList
}
}


foreach (string link in sFullImageList)//for each link
{
source = dlr.GetSource(link);//get the image page source
List sImagelinks = hm.ExtractAllSrcTags(source,"/h/ image.php", "q.lol S.bbq p.afk O.ffs");//get image files list
foreach (string sImageLink in sImagelinks)//for each image
{
dlr.GetImage(sImageLink, string.Concat("c:\\Dls\\", DateTime.Now.Ticks.ToString(), ".jpg"));
}
}
Console.WriteLine("done! :)");
}


}

3 commenti: