The Ultimate NSXMLParser Cheatsheet

Oct 31, 2023 ยท 4 min read

NSXMLParser allows parsing XML documents in Objective-C. It provides SAX style event-driven parsing.

Getting Started

Import:

#import <Foundation/Foundation.h>

Initialize:

NSXMLParser *parser = [[NSXMLParser alloc] initWithData:data];

Set delegate:

parser.delegate = self;

Start parsing:

[parser parse];

Delegate Methods

Did start document:

- (void)parserDidStartDocument:(NSXMLParser *)parser {
  // Called when parsing starts
}

Did end document:

- (void)parserDidEndDocument:(NSXMLParser *)parser {
  // Called when parsing ends
}

Did start element:

- (void)parser:(NSXMLParser *)parser
 didStartElement:(NSString *)elementName
   namespaceURI:(NSString *)namespaceURI
  qualifiedName:(NSString *)qName
     attributes:(NSDictionary *)attributeDict {

  // Element opened
}

Did end element:

- (void)parser:(NSXMLParser *)parser
 didEndElement:(NSString *)elementName
   namespaceURI:(NSString *)namespaceURI
  qualifiedName:(NSString *)qName {

  // Element closed
}

Found characters:

- (void)parser:(NSXMLParser *)parser
 foundCharacters:(NSString *)string {

  // Found text
}

Parsing Options

Validate against DTD:

parser.shouldProcessNamespaces = YES;
parser.shouldReportNamespacePrefixes = YES;
parser.shouldResolveExternalEntities = YES;

Progress tracking:

- (void)parser:(NSXMLParser *)parser
  parseProgress:(NSUInteger)percentDone {

  // percentDone from 0 to 100
}

Validation

Check errors:

NSError *error = [parser parserError];
if(error) {
  // handle error
}

Tips

  • Set shouldProcessNamespaces for namespaces
  • Delegate methods called synchronously
  • Register prefixes to resolve namespaces
  • Cache and reuse parser for performance
  • Examples

    Simple parsing:

    @interface ParserDelegate : NSObject <NSXMLParserDelegate>
    @end
    
    @implementation ParserDelegate
    
    - (void)parser:(NSXMLParser *)parser
     didStartElement:(NSString *)elementName {
      NSLog(@"Started element: %@", elementName);
    }
    
    @end
    
    NSData *data = ...; // xml data
    
    NSXMLParser *parser = [[NSXMLParser alloc] initWithData:data];
    ParserDelegate *delegate = [[ParserDelegate alloc] init];
    parser.delegate = delegate;
    
    [parser parse];
    

    Extract text:

    NSMutableString *text;
    
    - (void)parser:(NSXMLParser *)parser
     didStartElement:(NSString *)elementName {
      text = [[NSMutableString alloc] init];
    }
    
    - (void)parser:(NSXMLParser *)parser
     foundCharacters:(NSString *)string {
      [text appendString:string];
    }
    
    - (void)parser:(NSXMLParser *)parser
     didEndElement:(NSString *)elementName {
      NSLog(@"Text: %@", text);
    }
    

    Entity Replacement

    Custom resolver:

    @interface EntityResolver : NSObject <NSXMLParserDelegate>
    @end
    
    @implementation EntityResolver
    
    - (NSData *)parser:(NSXMLParser *)parser
     resolveExternalEntityName:(NSString *)name
        systemIdentifier:(NSString *)systemId {
    
      if (name == ...) {
        return replacementData;
      } else {
        return nil;
      }
    
    }
    
    @end
    

    Writing XML

    NSXMLDocument:

    NSXMLElement *root = [NSXMLElement elementWithName:@"root"];
    [doc addChild:root];
    
    NSString *xml = [doc XMLStringWithOptions:0];
    

    NSXMLNode:

    NSXMLNode *node = [NSXMLNode elementWithName:@"node"];
    [node setStringValue:@"text"];
    [root addChild:node];
    

    Concurrent Parsing

    Background queue:

    dispatch_queue_t queue = dispatch_queue_create("bgQueue", NULL);
    parser.delegateQueue = queue;
    

    Thread sync:

    @synchronized(self) {
      // modify shared state
    }
    

    HTML Parsing

    Set options:

    parser.shouldReportNamespacePrefixes = NO;
    parser.shouldResolveExternalEntities = NO;
    

    Check element types:

    - (void)parser:(NSXMLParser *)parser
     didStartElement:(NSString *)elementName
      namespaceURI:(NSString *)namespaceURI
     qualifiedName:(NSString *)qName
        attributes:(NSDictionary *)attributeDict {
    
      if(namespaceURI == nil) {
        // HTML element
      }
    }
    

    Advanced Usage

    Custom stream parsing:

    NSInputStream *stream = ...; // custom stream
    
    NSXMLParser *parser = [[NSXMLParser alloc] initWithStream:stream];
    
    // read chunks
    [stream open];
    
    while(!done) {
      [parser parse];
      [stream read];
    }
    
    [stream close];
    

    Subclassing:

    @interface MyParser : NSXMLParser
    
    // override methods
    
    @end
    

    Additional Examples

    Parsing Complex XML

    // Recursively parse through elements
    - (void)parseElement:(NSXMLElement*)element {
      // Process element
      for(NSXMLNode* child in element.children) {
        if([child isKindOfClass:[NSXMLElement class]) {
          [self parseElement:(NSXMLElement*)child];
        }
      }
    }
    
    

    Handling Errors

    - (void)parser:(NSXMLParser *)parser
      validateError:(NSError *)parseError
    {
      if(parseError.code == ...) {
        // Handle specific error
      } else {
        // General error handling
      }
    }
    
    

    Concurrent Parsing

    dispatch_group_t group = dispatch_group_create();
    
    dispatch_group_async(group, queue, ^{
      // Parse in background
    });
    
    dispatch_group_notify(group, dispatch_get_main_queue(), ^{
      // Update UI on main thread
    });
    
    

    Custom Entity Replacement

    - (NSData *)parser:(NSXMLParser *)parser
     resolveExternalEntityName:(NSString *)name
               systemIdentifier:(NSString *)systemId
    {
      if(name == "foo") {
        return [fooData];
      } else {
        return nil;
      }
    }
    

    Tips

  • Reuse parsers for better performance
  • Parse XML incrementally when possible
  • Use caches for parsed objects instead of re-parsing
  • Avoid retain cycles with delegate pattern
  • Core Data Integration

    // Parse XML and populate Core Data
    NSEntityDescription *entity = ...;
    NSManagedObjectContext *context = ...;
    
    NSManagedObject *obj = [NSEntityDescription
      insertNewObjectForEntityForName:entity
      inManagedObjectContext:context];
    
    obj.title = [currentElement stringValueForAttribute:@"title"];
    
    [context save:&error];
    

    Debugging

  • Enable NSXMLParser logs and exceptions
  • Set delegate breakpoint to pause parsing
  • Use Xcode debugger and look at parser state
  • Troubleshooting

    Invalid XML

  • Enable strict validation
  • Handle errors in delegate methods
  • Concurrency issues

  • Synchronize shared state access
  • Ensure parser is on background thread
  • Memory leaks

  • Avoid strong reference cycles
  • Use weak references for delegates
  • Summary of Key Classes and Methods

    NSXMLParser - Main parser class

  • initWithData: - Initialize parser with XML data
  • initWithStream: - Initialize parser with input stream
  • parse - Start parsing the XML document
  • delegate - Delegate for parser callbacks
  • shouldProcessNamespaces - Enable namespace processing
  • NSXMLParserDelegate - Delegate protocol

  • parserDidStartDocument: - Parsing is starting
  • parserDidEndDocument: - Parsing ended
  • parser:didStartElement:namespaceURI:qualifiedName:attributes: - Element opened
  • parser:didEndElement:namespaceURI:qualifiedName: - Element closed
  • parser:foundCharacters: - Found text content
  • NSXMLNode - Represents XML node

  • elementsForName: - Get child elements by name
  • attributes - Get attributes dictionary
  • NSXMLElement - Subclass of NSXMLNode

  • initWithName:stringValue: - Create new element
  • addChild: - Add child node
  • setStringValue:forAttribute - Set attribute value
  • External Documentation

  • Apple Docs on NSXMLParser
  • XML Parsing Guide
  • WWDC 2012 Session Video
  • Character Encoding

    Set stringEncoding property:

    parser.stringEncoding = NSUTF8StringEncoding;
    

    Memory Management Tips

  • Use autorelease pool when parsing large files
  • Avoid retain cycles with delegates
  • Reuse parsers instead of creating frequently
  • Clear delegates when no longer needed
  • Common Pitfalls

  • Forgetting to set delegate
  • Retain cycle with delegates
  • Accessing from wrong thread
  • Overflowing memory by parsing huge files
  • Not handling errors properly
  • Browse by tags:

    Browse by language:

    The easiest way to do Web Scraping

    Get HTML from any page with a simple API call. We handle proxy rotation, browser identities, automatic retries, CAPTCHAs, JavaScript rendering, etc automatically for you


    Try ProxiesAPI for free

    curl "http://api.proxiesapi.com/?key=API_KEY&url=https://example.com"

    <!doctype html>
    <html>
    <head>
        <title>Example Domain</title>
        <meta charset="utf-8" />
        <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1" />
    ...

    X

    Don't leave just yet!

    Enter your email below to claim your free API key: