unionAll resultando en StackOverflow

He progresado con mi propia pregunta (¿Cómo cargar un marco de datos desde una secuencia de solicitudes de Python que está descargando un archivo CSV?) en StackOverflow, pero recibo un error de StackOverflow:

import requests
import numpy as np
import pandas as pd

import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)

chunk_size = 1024

url = "https://{0}:8443/gateway/default/webhdfs/v1/{1}?op=OPEN".format(host, filepath)

r = requests.get(url, auth=(username, password), 
                 verify=False, allow_redirects=True, 
                 stream=True)

df = None
curr_line = 1
remainder = ''
for chunk in r.iter_content(chunk_size):
    txt = remainder + chunk
    [lines, remainder] = txt.rsplit('\n', 1)

    pdf = pd.read_csv(StringIO(lines), sep='|', header=None)

    if df == None:
        df = sqlContext.createDataFrame(pdf)
    else:
        df = df.unionAll(sqlContext.createDataFrame(pdf))

print df.count()

El stacktrace está aquí:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-4-b3a89df3c7d8> in <module>()
     36         df = sqlContext.createDataFrame(pdf)
     37     else:
---> 38         df = df.unionAll(sqlContext.createDataFrame(pdf))
     39 
     40     #curr_line = curr_line + 1

/usr/local/src/spark160master/spark/python/pyspark/sql/dataframe.py in unionAll(self, other)
    993         This is equivalent to `UNION ALL` in SQL.
    994         """
--> 995         return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx)
    996 
    997     @since(1.3)

/usr/local/src/spark160master/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
    811         answer = self.gateway_client.send_command(command)
    812         return_value = get_return_value(
--> 813             answer, self.gateway_client, self.target_id, self.name)
    814 
    815         for temp_arg in temp_args:

/usr/local/src/spark160master/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
     43     def deco(*a, **kw):
     44         try:
---> 45             return f(*a, **kw)
     46         except py4j.protocol.Py4JJavaError as e:
     47             s = e.java_exception.toString()

/usr/local/src/spark160master/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    306                 raise Py4JJavaError(
    307                     "An error occurred while calling {0}{1}{2}.\n".
--> 308                     format(target_id, ".", name), value)
    309             else:
    310                 raise Py4JError(

Py4JJavaError: An error occurred while calling o19563.unionAll.
: java.lang.StackOverflowError

No estoy seguro de cómo solucionar esto. Cualquier consejo apreciado.

Respuestas a la pregunta(1)

Su respuesta a la pregunta