eBay is one of the largest online marketplaces with millions of active listings at any given time. In this tutorial, we'll walk through how to scrape and extract key data from eBay listings using Objective-C and the HTMLParser library.
Setup
We'll need the following frameworks installed:
You can install these using CocoaPods by adding this Podfile:
platform :ios, '9.0'
target 'MyApp' do
use_frameworks!
pod 'HTMLParser'
end
Then run
We'll also define the starting eBay URL to scrape and a string for the user agent header to spoof a browser visit:
#import <Foundation/Foundation.h>
#import <HTMLParser/HTMLParser.h>
NSString *urlString = @"<https://www.ebay.com/sch/i.html?_nkw=baseball>";
NSString *userAgent = @"Mozilla/5.0 ...";
Replace the user agent string with your own browser's user agent.
Fetch the Listings Page
We'll use NSURLSession to fetch the HTML content from the eBay URL:
NSURL *url = [NSURL URLWithString:urlString];
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
[request setValue:userAgent forHTTPHeaderField:@"User-Agent"];
NSURLSessionDataTask *task = [[NSURLSession sharedSession] dataTaskWithRequest:request completionHandler:^(NSData * _Nullable data, NSURLResponse * _Nullable response, NSError * _Nullable error) {
NSString *html = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
TFHpple *parser = [TFHpple hppleWithHTMLData:data];
// Parse HTML
}];
[task resume];
The user agent is added to the request headers. We initialize the HTMLParser with the fetched data.
Extract Listing Data
Now we can extract the key data points from each listing. eBay encloses each in a We use XPath queries and access child elements to extract the values. Finally we can print the extracted info: This will output each listing's title, url, price and other data. Here is the full code to scrape and extract eBay listing data:
Get HTML from any page with a simple API call. We handle proxy rotation, browser identities, automatic retries, CAPTCHAs, JavaScript rendering, etc automatically for you
curl "http://api.proxiesapi.com/?key=API_KEY&url=https://example.com" <!doctype html>NSArray *listingItems = [parser searchWithXPathQuery:@"//div[contains(@class, 's-item__info')]"];
for (TFHppleElement *element in listingItems) {
NSString *title = [[element firstChild] content];
NSString *url = [[element firstChildWithClass:"s-item__link"] objectForKey:@"href"];
NSString *price = [[element childWithClass:"s-item__price"] content];
// And so on for other fields like seller, shipping, location etc.
NSLog(@"%@", title);
NSLog(@"%@", url);
NSLog(@"%@", price);
}
Print Results
NSLog(@"Title: %@", title);
NSLog(@"URL: %@", url);
NSLog(@"Price: %@", price);
NSLog(@"===================="); // Separator
Full Code
#import <Foundation/Foundation.h>
#import <HTMLParser/HTMLParser.h>
NSString *urlString = @"<https://www.ebay.com/sch/i.html?_nkw=baseball>";
NSString *userAgent = @"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36";
NSURL *url = [NSURL URLWithString:urlString];
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
[request setValue:userAgent forHTTPHeaderField:@"User-Agent"];
NSURLSessionDataTask *task = [[NSURLSession sharedSession] dataTaskWithRequest:request completionHandler:^(NSData * _Nullable data, NSURLResponse * _Nullable response, NSError * _Nullable error) {
NSString *html = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
TFHpple *parser = [TFHpple hppleWithHTMLData:data];
NSArray *listingItems = [parser searchWithXPathQuery:@"//div[contains(@class, 's-item__info')]"];
for (TFHppleElement *element in listingItems) {
NSString *title = [[element firstChild] content];
NSString *url = [[element firstChildWithClass:"s-item__link"] objectForKey:@"href"];
NSString *price = [[element childWithClass:"s-item__price"] content];
NSString *details = [[element childAtIndex:7] content];
NSString *sellerInfo = [[element childAtIndex:11] content];
NSString *shippingCost = [[element childAtIndex:13] content];
NSString *location = [[element childAtIndex:15] content];
NSString *sold = [[element childAtIndex:17] content];
NSLog(@"Title: %@", title);
NSLog(@"URL: %@", url);
NSLog(@"Price: %@", price);
NSLog(@"Details: %@", details);
NSLog(@"Seller: %@", sellerInfo);
NSLog(@"Shipping: %@", shippingCost);
NSLog(@"Location: %@", location);
NSLog(@"Sold: %@", sold);
NSLog(@"====================");
}
}];
[task resume];
Browse by language:
The easiest way to do Web Scraping
Try ProxiesAPI for free
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8" />
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
...