From ba83064608f2d6c18ec7d86e49269c1da565d9b4 Mon Sep 17 00:00:00 2001
From: Harsh J <harsh@cloudera.com>
Date: Thu, 28 Apr 2011 11:57:14 +0530
Subject: [PATCH] HUE-1. Add avro file viewer support to File Browser.

---
 apps/filebrowser/src/filebrowser/static/css/fb.css |    4 +-
 .../src/filebrowser/templates/display.mako         |    6 +-
 apps/filebrowser/src/filebrowser/views.py          |  137 +++-
 apps/filebrowser/src/filebrowser/views_test.py     |   60 ++
 desktop/core/ext-py/avro-1.5.0/PKG-INFO            |   11 +
 desktop/core/ext-py/avro-1.5.0/setup.py            |   40 +
 .../core/ext-py/avro-1.5.0/src/avro/__init__.py    |   18 +
 .../core/ext-py/avro-1.5.0/src/avro/datafile.py    |  331 ++++++++
 desktop/core/ext-py/avro-1.5.0/src/avro/io.py      |  877 ++++++++++++++++++++
 desktop/core/ext-py/avro-1.5.0/src/avro/ipc.py     |  510 ++++++++++++
 .../core/ext-py/avro-1.5.0/src/avro/protocol.py    |  222 +++++
 desktop/core/ext-py/avro-1.5.0/src/avro/schema.py  |  707 ++++++++++++++++
 desktop/core/ext-py/avro-1.5.0/src/avro/tool.py    |  160 ++++
 desktop/core/ext-py/avro-1.5.0/src/avro/txipc.py   |  222 +++++
 .../core/ext-py/avro-1.5.0/test/test_datafile.py   |  149 ++++
 .../avro-1.5.0/test/test_datafile_interop.py       |   39 +
 desktop/core/ext-py/avro-1.5.0/test/test_io.py     |  337 ++++++++
 desktop/core/ext-py/avro-1.5.0/test/test_ipc.py    |   31 +
 .../core/ext-py/avro-1.5.0/test/test_protocol.py   |  422 ++++++++++
 desktop/core/ext-py/avro-1.5.0/test/test_schema.py |  394 +++++++++
 20 files changed, 4641 insertions(+), 36 deletions(-)
 create mode 100644 desktop/core/ext-py/avro-1.5.0/PKG-INFO
 create mode 100644 desktop/core/ext-py/avro-1.5.0/setup.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/__init__.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/datafile.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/io.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/ipc.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/protocol.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/schema.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/tool.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/src/avro/txipc.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/test/test_datafile.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/test/test_datafile_interop.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/test/test_io.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/test/test_ipc.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/test/test_protocol.py
 create mode 100644 desktop/core/ext-py/avro-1.5.0/test/test_schema.py
diff --git a/apps/filebrowser/src/filebrowser/static/css/fb.css b/apps/filebrowser/src/filebrowser/static/css/fb.css
index 13379d5..596b9a9 100644
--- a/apps/filebrowser/src/filebrowser/static/css/fb.css
+++ b/apps/filebrowser/src/filebrowser/static/css/fb.css
@@ -424,7 +424,7 @@ div.fileviewer .fv-viewLocation {
 	background: url(/static/art/icons/folder_go.png) left 50%;
 }
 
-div.fileviewer .fv-viewGzip {
+div.fileviewer .fv-viewGzip .fv-viewAvro {
 	background: url(/static/art/icons/page_white_zip.png) left 50%;
 }
 
@@ -541,4 +541,4 @@ div.fileeditor .fe-buttons {
 .fs-locationInput {
 	width: 400px;
 	margin-left: 5px;
-}
\ No newline at end of file
+}
diff --git a/apps/filebrowser/src/filebrowser/templates/display.mako b/apps/filebrowser/src/filebrowser/templates/display.mako
index 9c61078..b3b93af 100644
--- a/apps/filebrowser/src/filebrowser/templates/display.mako
+++ b/apps/filebrowser/src/filebrowser/templates/display.mako
@@ -43,6 +43,10 @@
         <a class="fv-viewGzip" data-filters="ArtButton" data-icon-styles="{'width': 16, 'height': 16}" href="${base_url}?offset=0&length=2000&mode=${view['mode']}&compression=gzip">Preview As Gzip</a>
       % endif
 
+      % if view['compression'] != "avro" and path.endswith('.avro'):
+        <a class="fv-viewAvro" data-filters="ArtButton" data-icon-styles="{'width': 16, 'height': 16}" href="${base_url}?offset=0&length=2000&mode=${view['mode']}&compression=avro">Preview As Avro</a>
+      % endif
+
       % if view['compression'] and view['compression'] != "none":
         <a class="fv-viewGzip" data-filters="ArtButton" data-icon-styles="{'width': 16, 'height': 16}" href="${base_url}?offset=0&length=2000&mode=${view['mode']}&compression=none">Stop preview</a>
       % endif
@@ -56,7 +60,7 @@
     </div>
   </div>
   <div class="fv-navhead">
-    % if not view['compression'] or view['compression'] == "none":
+    % if not view['compression'] or view['compression'] in ("none", "avro"):
       <div class="fv-navStatus">
         <form data-filters="SubmitOnChange" class="fv-changeBytesForm" action="${url('filebrowser.views.view', path=path_enc)}" method="GET">
           <span class="fv-bold">Viewing Bytes:</span><a class="fv-editBytes jframe-inline" title="Enter Bytes"></a>
diff --git a/apps/filebrowser/src/filebrowser/views.py b/apps/filebrowser/src/filebrowser/views.py
index cac2ce1..84e44ca 100644
--- a/apps/filebrowser/src/filebrowser/views.py
+++ b/apps/filebrowser/src/filebrowser/views.py
@@ -36,6 +36,7 @@ from django.utils.http import http_date, urlquote
 from django.utils.html import escape
 from cStringIO import StringIO
 from gzip import GzipFile
+from avro import datafile, io
 
 from desktop.lib import i18n
 from desktop.lib.django_util import make_absolute, render_json
@@ -400,34 +401,12 @@ def display(request, path):
   if length > MAX_CHUNK_SIZE_BYTES:
     raise PopupException("Cannot request chunks greater than %d bytes" % MAX_CHUNK_SIZE_BYTES)
 
-  # Auto gzip detection, unless we are explicitly told to view binary
-  if not compression and mode != 'binary':
-    if path.endswith('.gz') and detect_gzip(request.fs.open(path).read(2)):
-      compression = 'gzip'
-      offset = 0
-    else:
-      compression = 'none'
-
-  f = request.fs.open(path)
-
-  if compression == 'gzip':
-    if offset and offset != 0:
-      raise PopupException("We don't support offset and gzip Compression")
-    try:
-      try:
-        contents = GzipFile('', 'r', 0, StringIO(f.read())).read(length)
-      except:
-        logging.warn("Could not decompress file at %s" % path, exc_info=True)
-        contents = ''
-        raise PopupException("Failed to decompress file")
-    finally:
-      f.close()
-  else:
-    try:
-      f.seek(offset)
-      contents = f.read(length)
-    finally:
-      f.close()
+  # Do not decompress in binary mode.
+  if mode == 'binary':
+    compression = 'none'
+  # Read out based on meta.
+  compression, offset, length, contents = \
+    read_contents(compression, path, request.fs, offset, length)
 
   # Get contents as string for text mode, or at least try
   uni_contents = None
@@ -471,11 +450,103 @@ def display(request, path):
 
   return render_with_toolbars("display.mako", request, data)
 
+def read_contents(codec_type, path, fs, offset, length):
+  '''Reads contents of a passed path, by appropriately decoding the data.
+     Arguments:
+     codec_type - The type of codec to use to decode. (Auto-detected if None).
+     path - The path of the file to read.
+     fs - The FileSystem instance to use to read.
+     offset - Offset to seek to before read begins.
+     length - Amount of bytes to read after offset.
+     Returns: A tuple of codec_type, offset, length and contents read.
+  '''
+  # Auto codec detection for [gzip, avro, none]
+  # Only done when codec_type is unset
+  if not codec_type:
+    if path.endswith('.gz') and detect_gzip(fs.open(path).read(2)):
+      codec_type = 'gzip'
+      offset = 0
+    elif path.endswith('.avro') and detect_avro(fs.open(path).read(3)):
+      codec_type = 'avro'
+    else:
+      codec_type = 'none'
+
+  f = fs.open(path)
+  contents = ''
+
+  if codec_type == 'gzip':
+    contents = _read_gzip(fs, path, offset, length)
+  elif codec_type == 'avro':
+    contents = _read_avro(fs, path, offset, length)
+  else:
+    # for 'none' type.
+    contents = _read_simple(fs, path, offset, length)
+
+  return (codec_type, offset, length, contents)
+
+def _read_avro(fs, path, offset, length):
+  contents = ''
+  try:
+    fhandle = fs.open(path)
+    try:
+      fhandle.seek(offset)
+      data_file_reader = datafile.DataFileReader(fhandle, io.DatumReader())
+      contents_list = []
+      read_start = fhandle.tell()
+      # Iterate over the entire sought file.
+      for datum in data_file_reader:
+        read_length = fhandle.tell() - read_start
+        if read_length > length and len(contents_list) > 0:
+          break
+        else:
+          datum_str = str(datum) + "\n"
+          contents_list.append(datum_str)
+      data_file_reader.close()
+      contents = "".join(contents_list)
+    except:
+      logging.warn("Could not read avro file at %s" % path, exc_info=True)
+      raise PopupException("Failed to read Avro file")
+  finally:
+    fhandle.close()
+  return contents
+
+def _read_gzip(fs, path, offset, length):
+  contents = ''
+  if offset and offset != 0:
+    raise PopupException("We don't support offset and gzip Compression")
+  try:
+    fhandle = fs.open(path)
+    try:
+      contents = GzipFile('', 'r', 0, StringIO(fhandle.read())).read(length)
+    except:
+      logging.warn("Could not decompress file at %s" % path, exc_info=True)
+      raise PopupException("Failed to decompress file")
+  finally:
+    fhandle.close()
+  return contents
+
+def _read_simple(fs, path, offset, length):
+  contents = ''
+  try:
+    fhandle = fs.open(path)
+    try:
+      fhandle.seek(offset)
+      contents = fhandle.read(length)
+    except:
+      logging.warn("Could not read file at %s" % path, exc_info=True)
+      raise PopupException("Failed to read file")
+  finally:
+    fhandle.close()
+  return contents
+
 def detect_gzip(contents):
-  ''' This is a silly small function which checks to see if the file is Gzip'''
-  if contents[:2] == '\x1f\x8b':
-    return True
-  return False
+  '''This is a silly small function which checks to see if the file is Gzip'''
+  return contents[:2] == '\x1f\x8b'
+
+def detect_avro(contents):
+  '''This is a silly small function which checks to see if the file is Avro'''
+  # Check if the first three bytes are 'O', 'b' and 'j'
+  return contents[:3] == '\x4F\x62\x6A'
 
 def _calculate_navigation(offset, length, size):
   """
@@ -730,4 +801,4 @@ def truncate(toTruncate, charsToKeep=50):
     truncated = toTruncate[:charsToKeep] + "..."
     return truncated
   else:
-    return toTruncate 
+    return toTruncate
diff --git a/apps/filebrowser/src/filebrowser/views_test.py b/apps/filebrowser/src/filebrowser/views_test.py
index 1a28ef3..fabc2f9 100644
--- a/apps/filebrowser/src/filebrowser/views_test.py
+++ b/apps/filebrowser/src/filebrowser/views_test.py
@@ -20,6 +20,7 @@ Tests for filebrowser views
 """
 from nose.plugins.attrib import attr
 from hadoop import mini_cluster
+from avro import schema, datafile, io
 from desktop.lib.django_test_util import make_logged_in_client
 from nose.tools import assert_true, assert_false, assert_equal
 import logging
@@ -105,6 +106,65 @@ def test_listdir():
       pass      # Don't let cleanup errors mask earlier failures
     cluster.shutdown()
 
+@attr('requires_hadoop')
+def test_view_avro():
+  cluster = mini_cluster.shared_cluster(conf=True)
+  try:
+    c = make_logged_in_client()
+    cluster.fs.setuser(cluster.superuser)
+    if cluster.fs.isdir("/test-avro-filebrowser"):
+      cluster.fs.rmtree('/test-avro-filebrowser/')
+
+    cluster.fs.mkdir('/test-avro-filebrowser/')
+
+    test_schema = schema.parse("""
+      {
+        "name": "test",
+        "type": "record",
+        "fields": [
+          { "name": "name", "type": "string" },
+          { "name": "integer", "type": "int" }
+        ]
+      }
+    """)
+
+    f = cluster.fs.open('/test-avro-filebrowser/test-view.avro', "w")
+    data_file_writer = datafile.DataFileWriter(f, io.DatumWriter(),
+                                                writers_schema=test_schema,
+                                                codec='deflate')
+    dummy_datum = {
+      'name': 'Test',
+      'integer': 10,
+    }
+    data_file_writer.append(dummy_datum)
+    data_file_writer.close()
+
+    # autodetect
+    response = c.get('/filebrowser/view/test-avro-filebrowser/test-view.avro')
+    assert_equal(response.context['view']['contents'], "{u'integer': 10, u'name': u'Test'}\n")
+
+    # offsetting should work as well
+    response = c.get('/filebrowser/view/test-avro-filebrowser/test-view.avro?offset=1')
+    assert_true(response.context.has_key('view'))
+
+    f = cluster.fs.open('/test-avro-filebrowser/test-view2.avro', "w")
+    f.write("hello")
+    f.close()
+
+    # we shouldn't autodetect non avro files
+    response = c.get('/filebrowser/view/test-avro-filebrowser/test-view2.avro')
+    assert_equal(response.context['view']['contents'], "hello")
+
+    # we should fail to do a bad thing if they specify compression when it's not set.
+    response = c.get('/filebrowser/view/test-avro-filebrowser/test-view2.avro?compression=gzip')
+    assert_false(response.context.has_key('view'))
+
+  finally:
+    try:
+      cluster.fs.rmtree('/test-avro-filebrowser/')
+    except:
+      pass      # Don't let cleanup errors mask earlier failures
+    cluster.shutdown()
 
 @attr('requires_hadoop')
 def test_view_gz():
diff --git a/desktop/core/ext-py/avro-1.5.0/PKG-INFO b/desktop/core/ext-py/avro-1.5.0/PKG-INFO
new file mode 100644
index 0000000..6274d06
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/PKG-INFO
@@ -0,0 +1,11 @@
+Metadata-Version: 1.0
+Name: avro
+Version: 1.5.0
+Summary: Avro is a serialization and RPC framework.
+Home-page: http://hadoop.apache.org/avro
+Author: Apache Avro
+Author-email: avro-dev@hadoop.apache.org
+License: Apache License 2.0
+Description: UNKNOWN
+Keywords: avro serialization rpc
+Platform: UNKNOWN
diff --git a/desktop/core/ext-py/avro-1.5.0/setup.py b/desktop/core/ext-py/avro-1.5.0/setup.py
new file mode 100644
index 0000000..27e5eaa
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/setup.py
@@ -0,0 +1,40 @@
+#! /usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+  from setuptools import setup
+except ImportError:
+  from distutils.core import setup
+
+setup(
+  name = 'avro',
+  version = '1.5.0',
+  packages = ['avro',],
+  package_dir = {'avro': 'src/avro'},
+
+  # Project uses simplejson, so ensure that it gets installed or upgraded
+  # on the target machine
+  install_requires = ['simplejson >= 2.0.9'],
+
+  # metadata for upload to PyPI
+  author = 'Apache Avro',
+  author_email = 'avro-dev@hadoop.apache.org',
+  description = 'Avro is a serialization and RPC framework.',
+  license = 'Apache License 2.0',
+  keywords = 'avro serialization rpc',
+  url = 'http://hadoop.apache.org/avro',
+)
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/__init__.py b/desktop/core/ext-py/avro-1.5.0/src/avro/__init__.py
new file mode 100644
index 0000000..da51d9b
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ['schema', 'io', 'datafile', 'protocol', 'ipc']
+
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/datafile.py b/desktop/core/ext-py/avro-1.5.0/src/avro/datafile.py
new file mode 100644
index 0000000..f81c7e2
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/datafile.py
@@ -0,0 +1,331 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Read/Write Avro File Object Containers.
+"""
+import zlib
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from avro import schema
+from avro import io
+
+#
+# Constants
+#
+
+VERSION = 1
+MAGIC = 'Obj' + chr(VERSION)
+MAGIC_SIZE = len(MAGIC)
+SYNC_SIZE = 16
+SYNC_INTERVAL = 1000 * SYNC_SIZE # TODO(hammer): make configurable
+META_SCHEMA = schema.parse("""\
+{"type": "record", "name": "org.apache.avro.file.Header",
+ "fields" : [
+   {"name": "magic", "type": {"type": "fixed", "name": "magic", "size": %d}},
+   {"name": "meta", "type": {"type": "map", "values": "bytes"}},
+   {"name": "sync", "type": {"type": "fixed", "name": "sync", "size": %d}}]}
+""" % (MAGIC_SIZE, SYNC_SIZE))
+VALID_CODECS = ['null', 'deflate']
+VALID_ENCODINGS = ['binary'] # not used yet
+
+CODEC_KEY = "avro.codec"
+SCHEMA_KEY = "avro.schema"
+
+#
+# Exceptions
+#
+
+class DataFileException(schema.AvroException):
+  """
+  Raised when there's a problem reading or writing file object containers.
+  """
+  def __init__(self, fail_msg):
+    schema.AvroException.__init__(self, fail_msg)
+
+#
+# Write Path
+#
+
+class DataFileWriter(object):
+  @staticmethod
+  def generate_sync_marker():
+    return generate_sixteen_random_bytes()
+
+  # TODO(hammer): make 'encoder' a metadata property
+  def __init__(self, writer, datum_writer, writers_schema=None, codec='null'):
+    """
+    If the schema is not present, presume we're appending.
+
+    @param writer: File-like object to write into.
+    """
+    self._writer = writer
+    self._encoder = io.BinaryEncoder(writer)
+    self._datum_writer = datum_writer
+    self._buffer_writer = StringIO()
+    self._buffer_encoder = io.BinaryEncoder(self._buffer_writer)
+    self._block_count = 0
+    self._meta = {}
+
+    if writers_schema is not None:
+      if codec not in VALID_CODECS:
+        raise DataFileException("Unknown codec: %r" % codec)
+      self._sync_marker = DataFileWriter.generate_sync_marker()
+      self.set_meta('avro.codec', codec)
+      self.set_meta('avro.schema', str(writers_schema))
+      self.datum_writer.writers_schema = writers_schema
+      self._write_header()
+    else:
+      # open writer for reading to collect metadata
+      dfr = DataFileReader(writer, io.DatumReader())
+      
+      # TODO(hammer): collect arbitrary metadata
+      # collect metadata
+      self._sync_marker = dfr.sync_marker
+      self.set_meta('avro.codec', dfr.get_meta('avro.codec'))
+
+      # get schema used to write existing file
+      schema_from_file = dfr.get_meta('avro.schema')
+      self.set_meta('avro.schema', schema_from_file)
+      self.datum_writer.writers_schema = schema.parse(schema_from_file)
+
+      # seek to the end of the file and prepare for writing
+      writer.seek(0, 2)
+
+  # read-only properties
+  writer = property(lambda self: self._writer)
+  encoder = property(lambda self: self._encoder)
+  datum_writer = property(lambda self: self._datum_writer)
+  buffer_writer = property(lambda self: self._buffer_writer)
+  buffer_encoder = property(lambda self: self._buffer_encoder)
+  sync_marker = property(lambda self: self._sync_marker)
+  meta = property(lambda self: self._meta)
+
+  # read/write properties
+  def set_block_count(self, new_val):
+    self._block_count = new_val
+  block_count = property(lambda self: self._block_count, set_block_count)
+
+  # utility functions to read/write metadata entries
+  def get_meta(self, key):
+    return self._meta.get(key)
+  def set_meta(self, key, val):
+    self._meta[key] = val
+
+  def _write_header(self):
+    header = {'magic': MAGIC,
+              'meta': self.meta,
+              'sync': self.sync_marker}
+    self.datum_writer.write_data(META_SCHEMA, header, self.encoder)
+
+  # TODO(hammer): make a schema for blocks and use datum_writer
+  def _write_block(self):
+    if self.block_count > 0:
+      # write number of items in block
+      self.encoder.write_long(self.block_count)
+
+      # write block contents
+      uncompressed_data = self.buffer_writer.getvalue()
+      if self.get_meta(CODEC_KEY) == 'null':
+        compressed_data = uncompressed_data
+      elif self.get_meta(CODEC_KEY) == 'deflate':
+        # The first two characters and last character are zlib
+        # wrappers around deflate data.
+        compressed_data = zlib.compress(uncompressed_data)[2:-1]
+      else:
+        fail_msg = '"%s" codec is not supported.' % self.get_meta(CODEC_KEY)
+        raise DataFileException(fail_msg)
+
+      # Write length of block
+      self.encoder.write_long(len(compressed_data))
+
+      # Write block
+      self.writer.write(compressed_data)
+
+      # write sync marker
+      self.writer.write(self.sync_marker)
+
+      # reset buffer
+      self.buffer_writer.truncate(0) 
+      self.block_count = 0
+
+  def append(self, datum):
+    """Append a datum to the file."""
+    self.datum_writer.write(datum, self.buffer_encoder)
+    self.block_count += 1
+
+    # if the data to write is larger than the sync interval, write the block
+    if self.buffer_writer.tell() >= SYNC_INTERVAL:
+      self._write_block()
+
+  def sync(self):
+    """
+    Return the current position as a value that may be passed to
+    DataFileReader.seek(long). Forces the end of the current block,
+    emitting a synchronization marker.
+    """
+    self._write_block()
+    return self.writer.tell()
+
+  def flush(self):
+    """Flush the current state of the file, including metadata."""
+    self._write_block()
+    self.writer.flush()
+
+  def close(self):
+    """Close the file."""
+    self.flush()
+    self.writer.close()
+
+class DataFileReader(object):
+  """Read files written by DataFileWriter."""
+  # TODO(hammer): allow user to specify expected schema?
+  # TODO(hammer): allow user to specify the encoder
+  def __init__(self, reader, datum_reader):
+    self._reader = reader
+    self._raw_decoder = io.BinaryDecoder(reader)
+    self._datum_decoder = None # Maybe reset at every block.
+    self._datum_reader = datum_reader
+    
+    # read the header: magic, meta, sync
+    self._read_header()
+
+    # ensure codec is valid
+    self.codec = self.get_meta('avro.codec')
+    if self.codec is None:
+      self.codec = "null"
+    if self.codec not in VALID_CODECS:
+      raise DataFileException('Unknown codec: %s.' % self.codec)
+
+    # get file length
+    self._file_length = self.determine_file_length()
+
+    # get ready to read
+    self._block_count = 0
+    self.datum_reader.writers_schema = schema.parse(self.get_meta(SCHEMA_KEY))
+  
+  def __iter__(self):
+    return self
+
+  # read-only properties
+  reader = property(lambda self: self._reader)
+  raw_decoder = property(lambda self: self._raw_decoder)
+  datum_decoder = property(lambda self: self._datum_decoder)
+  datum_reader = property(lambda self: self._datum_reader)
+  sync_marker = property(lambda self: self._sync_marker)
+  meta = property(lambda self: self._meta)
+  file_length = property(lambda self: self._file_length)
+
+  # read/write properties
+  def set_block_count(self, new_val):
+    self._block_count = new_val
+  block_count = property(lambda self: self._block_count, set_block_count)
+
+  # utility functions to read/write metadata entries
+  def get_meta(self, key):
+    return self._meta.get(key)
+  def set_meta(self, key, val):
+    self._meta[key] = val
+
+  def determine_file_length(self):
+    """
+    Get file length and leave file cursor where we found it.
+    """
+    remember_pos = self.reader.tell()
+    self.reader.seek(0, 2)
+    file_length = self.reader.tell()
+    self.reader.seek(remember_pos)
+    return file_length
+
+  def is_EOF(self):
+    return self.reader.tell() == self.file_length
+
+  def _read_header(self):
+    # seek to the beginning of the file to get magic block
+    self.reader.seek(0, 0) 
+
+    # read header into a dict
+    header = self.datum_reader.read_data(
+      META_SCHEMA, META_SCHEMA, self.raw_decoder)
+
+    # check magic number
+    if header.get('magic') != MAGIC:
+      fail_msg = "Not an Avro data file: %s doesn't match %s."\
+                 % (header.get('magic'), MAGIC)
+      raise schema.AvroException(fail_msg)
+
+    # set metadata
+    self._meta = header['meta']
+
+    # set sync marker
+    self._sync_marker = header['sync']
+
+  def _read_block_header(self):
+    self.block_count = self.raw_decoder.read_long()
+    if self.codec == "null":
+      # Skip a long; we don't need to use the length.
+      self.raw_decoder.skip_long()
+      self._datum_decoder = self._raw_decoder
+    else:
+      # Compressed data is stored as (length, data), which
+      # corresponds to how the "bytes" type is encoded.
+      data = self.raw_decoder.read_bytes()
+      # -15 is the log of the window size; negative indicates
+      # "raw" (no zlib headers) decompression.  See zlib.h.
+      uncompressed = zlib.decompress(data, -15)
+      self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
+
+  def _skip_sync(self):
+    """
+    Read the length of the sync marker; if it matches the sync marker,
+    return True. Otherwise, seek back to where we started and return False.
+    """
+    proposed_sync_marker = self.reader.read(SYNC_SIZE)
+    if proposed_sync_marker != self.sync_marker:
+      self.reader.seek(-SYNC_SIZE, 1)
+      return False
+    else:
+      return True
+
+  # TODO(hammer): handle block of length zero
+  # TODO(hammer): clean this up with recursion
+  def next(self):
+    """Return the next datum in the file."""
+    if self.block_count == 0:
+      if self.is_EOF():
+        raise StopIteration
+      elif self._skip_sync():
+        if self.is_EOF(): raise StopIteration
+        self._read_block_header()
+      else:
+        self._read_block_header()
+
+    datum = self.datum_reader.read(self.datum_decoder) 
+    self.block_count -= 1
+    return datum
+
+  def close(self):
+    """Close this reader."""
+    self.reader.close()
+
+def generate_sixteen_random_bytes():
+  try:
+    import os
+    return os.urandom(16)
+  except:
+    import random
+    return [ chr(random.randrange(256)) for i in range(16) ]
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/io.py b/desktop/core/ext-py/avro-1.5.0/src/avro/io.py
new file mode 100644
index 0000000..b7f0f86
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/io.py
@@ -0,0 +1,877 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Input/Output utilities, including:
+
+ * i/o-specific constants
+ * i/o-specific exceptions
+ * schema validation
+ * leaf value encoding and decoding
+ * datum reader/writer stuff (?)
+
+Also includes a generic representation for data, which
+uses the following mapping:
+
+  * Schema records are implemented as dict.
+  * Schema arrays are implemented as list.
+  * Schema maps are implemented as dict.
+  * Schema strings are implemented as unicode.
+  * Schema bytes are implemented as str.
+  * Schema ints are implemented as int.
+  * Schema longs are implemented as long.
+  * Schema floats are implemented as float.
+  * Schema doubles are implemented as float.
+  * Schema booleans are implemented as bool. 
+"""
+import struct
+from avro import schema
+import sys
+
+try:
+	import json
+except ImportError:
+	import simplejson as json
+
+#
+# Constants
+#
+
+INT_MIN_VALUE = -(1 << 31)
+INT_MAX_VALUE = (1 << 31) - 1
+LONG_MIN_VALUE = -(1 << 63)
+LONG_MAX_VALUE = (1 << 63) - 1
+
+# TODO(hammer): shouldn't ! be < for little-endian (according to spec?)
+if sys.version_info >= (2, 5, 0):
+  struct_class = struct.Struct
+else:
+  class SimpleStruct(object):
+    def __init__(self, format):
+      self.format = format
+    def pack(self, *args):
+      return struct.pack(self.format, *args)
+    def unpack(self, *args):
+      return struct.unpack(self.format, *args)
+  struct_class = SimpleStruct
+
+STRUCT_INT = struct_class('!I')     # big-endian unsigned int
+STRUCT_LONG = struct_class('!Q')    # big-endian unsigned long long
+STRUCT_FLOAT = struct_class('!f')   # big-endian float
+STRUCT_DOUBLE = struct_class('!d')  # big-endian double
+
+#
+# Exceptions
+#
+
+class AvroTypeException(schema.AvroException):
+  """Raised when datum is not an example of schema."""
+  def __init__(self, expected_schema, datum):
+    pretty_expected = json.dumps(json.loads(str(expected_schema)), indent=2)
+    fail_msg = "The datum %s is not an example of the schema %s"\
+               % (datum, pretty_expected)
+    schema.AvroException.__init__(self, fail_msg)
+
+class SchemaResolutionException(schema.AvroException):
+  def __init__(self, fail_msg, writers_schema=None, readers_schema=None):
+    pretty_writers = json.dumps(json.loads(str(writers_schema)), indent=2)
+    pretty_readers = json.dumps(json.loads(str(readers_schema)), indent=2)
+    if writers_schema: fail_msg += "\nWriter's Schema: %s" % pretty_writers
+    if readers_schema: fail_msg += "\nReader's Schema: %s" % pretty_readers
+    schema.AvroException.__init__(self, fail_msg)
+
+#
+# Validate
+#
+
+def validate(expected_schema, datum):
+  """Determine if a python datum is an instance of a schema."""
+  schema_type = expected_schema.type
+  if schema_type == 'null':
+    return datum is None
+  elif schema_type == 'boolean':
+    return isinstance(datum, bool)
+  elif schema_type == 'string':
+    return isinstance(datum, basestring)
+  elif schema_type == 'bytes':
+    return isinstance(datum, str)
+  elif schema_type == 'int':
+    return ((isinstance(datum, int) or isinstance(datum, long)) 
+            and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
+  elif schema_type == 'long':
+    return ((isinstance(datum, int) or isinstance(datum, long)) 
+            and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE)
+  elif schema_type in ['float', 'double']:
+    return (isinstance(datum, int) or isinstance(datum, long)
+            or isinstance(datum, float))
+  elif schema_type == 'fixed':
+    return isinstance(datum, str) and len(datum) == expected_schema.size
+  elif schema_type == 'enum':
+    return datum in expected_schema.symbols
+  elif schema_type == 'array':
+    return (isinstance(datum, list) and
+      False not in [validate(expected_schema.items, d) for d in datum])
+  elif schema_type == 'map':
+    return (isinstance(datum, dict) and
+      False not in [isinstance(k, basestring) for k in datum.keys()] and
+      False not in
+        [validate(expected_schema.values, v) for v in datum.values()])
+  elif schema_type in ['union', 'error_union']:
+    return True in [validate(s, datum) for s in expected_schema.schemas]
+  elif schema_type in ['record', 'error', 'request']:
+    return (isinstance(datum, dict) and
+      False not in
+        [validate(f.type, datum.get(f.name)) for f in expected_schema.fields])
+
+#
+# Decoder/Encoder
+#
+
+class BinaryDecoder(object):
+  """Read leaf values."""
+  def __init__(self, reader):
+    """
+    reader is a Python object on which we can call read, seek, and tell.
+    """
+    self._reader = reader
+
+  # read-only properties
+  reader = property(lambda self: self._reader)
+
+  def read(self, n):
+    """
+    Read n bytes.
+    """
+    return self.reader.read(n)
+
+  def read_null(self):
+    """
+    null is written as zero bytes
+    """
+    return None
+
+  def read_boolean(self):
+    """
+    a boolean is written as a single byte 
+    whose value is either 0 (false) or 1 (true).
+    """
+    return ord(self.read(1)) == 1
+
+  def read_int(self):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    return self.read_long()
+
+  def read_long(self):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    b = ord(self.read(1))
+    n = b & 0x7F
+    shift = 7
+    while (b & 0x80) != 0:
+      b = ord(self.read(1))
+      n |= (b & 0x7F) << shift
+      shift += 7
+    datum = (n >> 1) ^ -(n & 1)
+    return datum
+
+  def read_float(self):
+    """
+    A float is written as 4 bytes.
+    The float is converted into a 32-bit integer using a method equivalent to
+    Java's floatToIntBits and then encoded in little-endian format.
+    """
+    bits = (((ord(self.read(1)) & 0xffL)) |
+      ((ord(self.read(1)) & 0xffL) <<  8) |
+      ((ord(self.read(1)) & 0xffL) << 16) |
+      ((ord(self.read(1)) & 0xffL) << 24))
+    return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+
+  def read_double(self):
+    """
+    A double is written as 8 bytes.
+    The double is converted into a 64-bit integer using a method equivalent to
+    Java's doubleToLongBits and then encoded in little-endian format.
+    """
+    bits = (((ord(self.read(1)) & 0xffL)) |
+      ((ord(self.read(1)) & 0xffL) <<  8) |
+      ((ord(self.read(1)) & 0xffL) << 16) |
+      ((ord(self.read(1)) & 0xffL) << 24) |
+      ((ord(self.read(1)) & 0xffL) << 32) |
+      ((ord(self.read(1)) & 0xffL) << 40) |
+      ((ord(self.read(1)) & 0xffL) << 48) |
+      ((ord(self.read(1)) & 0xffL) << 56))
+    return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+
+  def read_bytes(self):
+    """
+    Bytes are encoded as a long followed by that many bytes of data. 
+    """
+    return self.read(self.read_long())
+
+  def read_utf8(self):
+    """
+    A string is encoded as a long followed by
+    that many bytes of UTF-8 encoded character data.
+    """
+    return unicode(self.read_bytes(), "utf-8")
+
+  def skip_null(self):
+    pass
+
+  def skip_boolean(self):
+    self.skip(1)
+
+  def skip_int(self):
+    self.skip_long()
+
+  def skip_long(self):
+    b = ord(self.read(1))
+    while (b & 0x80) != 0:
+      b = ord(self.read(1))
+
+  def skip_float(self):
+    self.skip(4)
+
+  def skip_double(self):
+    self.skip(8)
+
+  def skip_bytes(self):
+    self.skip(self.read_long())
+
+  def skip_utf8(self):
+    self.skip_bytes()
+
+  def skip(self, n):
+    self.reader.seek(self.reader.tell() + n)
+
+class BinaryEncoder(object):
+  """Write leaf values."""
+  def __init__(self, writer):
+    """
+    writer is a Python object on which we can call write.
+    """
+    self._writer = writer
+
+  # read-only properties
+  writer = property(lambda self: self._writer)
+
+  def write(self, datum):
+    """Write an abritrary datum."""
+    self.writer.write(datum)
+
+  def write_null(self, datum):
+    """
+    null is written as zero bytes
+    """
+    pass
+  
+  def write_boolean(self, datum):
+    """
+    a boolean is written as a single byte 
+    whose value is either 0 (false) or 1 (true).
+    """
+    if datum:
+      self.write(chr(1))
+    else:
+      self.write(chr(0))
+
+  def write_int(self, datum):
+    """
+    int and long values are written using variable-length, zig-zag coding.    
+    """
+    self.write_long(datum);
+
+  def write_long(self, datum):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    datum = (datum << 1) ^ (datum >> 63)
+    while (datum & ~0x7F) != 0:
+      self.write(chr((datum & 0x7f) | 0x80))
+      datum >>= 7
+    self.write(chr(datum))
+
+  def write_float(self, datum):
+    """
+    A float is written as 4 bytes.
+    The float is converted into a 32-bit integer using a method equivalent to
+    Java's floatToIntBits and then encoded in little-endian format.
+    """
+    bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
+    self.write(chr((bits) & 0xFF))
+    self.write(chr((bits >> 8) & 0xFF))
+    self.write(chr((bits >> 16) & 0xFF))
+    self.write(chr((bits >> 24) & 0xFF))
+
+  def write_double(self, datum):
+    """
+    A double is written as 8 bytes.
+    The double is converted into a 64-bit integer using a method equivalent to
+    Java's doubleToLongBits and then encoded in little-endian format.
+    """
+    bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
+    self.write(chr((bits) & 0xFF))
+    self.write(chr((bits >> 8) & 0xFF))
+    self.write(chr((bits >> 16) & 0xFF))
+    self.write(chr((bits >> 24) & 0xFF))
+    self.write(chr((bits >> 32) & 0xFF))
+    self.write(chr((bits >> 40) & 0xFF))
+    self.write(chr((bits >> 48) & 0xFF))
+    self.write(chr((bits >> 56) & 0xFF))
+
+  def write_bytes(self, datum):
+    """
+    Bytes are encoded as a long followed by that many bytes of data. 
+    """
+    self.write_long(len(datum))
+    self.write(struct.pack('%ds' % len(datum), datum))
+
+  def write_utf8(self, datum):
+    """
+    A string is encoded as a long followed by
+    that many bytes of UTF-8 encoded character data.
+    """
+    datum = datum.encode("utf-8")
+    self.write_bytes(datum)
+
+#
+# DatumReader/Writer
+#
+
+class DatumReader(object):
+  """Deserialize Avro-encoded data into a Python data structure."""
+  @staticmethod
+  def check_props(schema_one, schema_two, prop_list):
+    for prop in prop_list:
+      if getattr(schema_one, prop) != getattr(schema_two, prop):
+        return False
+    return True
+
+  @staticmethod
+  def match_schemas(writers_schema, readers_schema):
+    w_type = writers_schema.type
+    r_type = readers_schema.type
+    if 'union' in [w_type, r_type] or 'error_union' in [w_type, r_type]:
+      return True
+    elif (w_type in schema.PRIMITIVE_TYPES and r_type in schema.PRIMITIVE_TYPES
+          and w_type == r_type):
+      return True
+    elif (w_type == r_type == 'record' and
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'error' and
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'request'):
+      return True
+    elif (w_type == r_type == 'fixed' and 
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname', 'size'])):
+      return True
+    elif (w_type == r_type == 'enum' and 
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'map' and 
+          DatumReader.check_props(writers_schema.values,
+                                  readers_schema.values, ['type'])):
+      return True
+    elif (w_type == r_type == 'array' and 
+          DatumReader.check_props(writers_schema.items,
+                                  readers_schema.items, ['type'])):
+      return True
+    
+    # Handle schema promotion
+    if w_type == 'int' and r_type in ['long', 'float', 'double']:
+      return True
+    elif w_type == 'long' and r_type in ['float', 'double']:
+      return True
+    elif w_type == 'float' and r_type == 'double':
+      return True
+    return False
+
+  def __init__(self, writers_schema=None, readers_schema=None):
+    """
+    As defined in the Avro specification, we call the schema encoded
+    in the data the "writer's schema", and the schema expected by the
+    reader the "reader's schema".
+    """
+    self._writers_schema = writers_schema
+    self._readers_schema = readers_schema 
+
+  # read/write properties
+  def set_writers_schema(self, writers_schema):
+    self._writers_schema = writers_schema
+  writers_schema = property(lambda self: self._writers_schema,
+                            set_writers_schema)
+  def set_readers_schema(self, readers_schema):
+    self._readers_schema = readers_schema
+  readers_schema = property(lambda self: self._readers_schema,
+                            set_readers_schema)
+  
+  def read(self, decoder):
+    if self.readers_schema is None:
+      self.readers_schema = self.writers_schema
+    return self.read_data(self.writers_schema, self.readers_schema, decoder)
+
+  def read_data(self, writers_schema, readers_schema, decoder):
+    # schema matching
+    if not DatumReader.match_schemas(writers_schema, readers_schema):
+      fail_msg = 'Schemas do not match.'
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+
+    # schema resolution: reader's schema is a union, writer's schema is not
+    if (writers_schema.type not in ['union', 'error_union']
+        and readers_schema.type in ['union', 'error_union']):
+      for s in readers_schema.schemas:
+        if DatumReader.match_schemas(writers_schema, s):
+          return self.read_data(writers_schema, s, decoder)
+      fail_msg = 'Schemas do not match.'
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+
+    # function dispatch for reading data based on type of writer's schema
+    if writers_schema.type == 'null':
+      return decoder.read_null()
+    elif writers_schema.type == 'boolean':
+      return decoder.read_boolean()
+    elif writers_schema.type == 'string':
+      return decoder.read_utf8()
+    elif writers_schema.type == 'int':
+      return decoder.read_int()
+    elif writers_schema.type == 'long':
+      return decoder.read_long()
+    elif writers_schema.type == 'float':
+      return decoder.read_float()
+    elif writers_schema.type == 'double':
+      return decoder.read_double()
+    elif writers_schema.type == 'bytes':
+      return decoder.read_bytes()
+    elif writers_schema.type == 'fixed':
+      return self.read_fixed(writers_schema, readers_schema, decoder)
+    elif writers_schema.type == 'enum':
+      return self.read_enum(writers_schema, readers_schema, decoder)
+    elif writers_schema.type == 'array':
+      return self.read_array(writers_schema, readers_schema, decoder)
+    elif writers_schema.type == 'map':
+      return self.read_map(writers_schema, readers_schema, decoder)
+    elif writers_schema.type in ['union', 'error_union']:
+      return self.read_union(writers_schema, readers_schema, decoder)
+    elif writers_schema.type in ['record', 'error', 'request']:
+      return self.read_record(writers_schema, readers_schema, decoder)
+    else:
+      fail_msg = "Cannot read unknown schema type: %s" % writers_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def skip_data(self, writers_schema, decoder):
+    if writers_schema.type == 'null':
+      return decoder.skip_null()
+    elif writers_schema.type == 'boolean':
+      return decoder.skip_boolean()
+    elif writers_schema.type == 'string':
+      return decoder.skip_utf8()
+    elif writers_schema.type == 'int':
+      return decoder.skip_int()
+    elif writers_schema.type == 'long':
+      return decoder.skip_long()
+    elif writers_schema.type == 'float':
+      return decoder.skip_float()
+    elif writers_schema.type == 'double':
+      return decoder.skip_double()
+    elif writers_schema.type == 'bytes':
+      return decoder.skip_bytes()
+    elif writers_schema.type == 'fixed':
+      return self.skip_fixed(writers_schema, decoder)
+    elif writers_schema.type == 'enum':
+      return self.skip_enum(writers_schema, decoder)
+    elif writers_schema.type == 'array':
+      return self.skip_array(writers_schema, decoder)
+    elif writers_schema.type == 'map':
+      return self.skip_map(writers_schema, decoder)
+    elif writers_schema.type in ['union', 'error_union']:
+      return self.skip_union(writers_schema, decoder)
+    elif writers_schema.type in ['record', 'error', 'request']:
+      return self.skip_record(writers_schema, decoder)
+    else:
+      fail_msg = "Unknown schema type: %s" % writers_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def read_fixed(self, writers_schema, readers_schema, decoder):
+    """
+    Fixed instances are encoded using the number of bytes declared
+    in the schema.
+    """
+    return decoder.read(writers_schema.size)
+
+  def skip_fixed(self, writers_schema, decoder):
+    return decoder.skip(writers_schema.size)
+
+  def read_enum(self, writers_schema, readers_schema, decoder):
+    """
+    An enum is encoded by a int, representing the zero-based position
+    of the symbol in the schema.
+    """
+    # read data
+    index_of_symbol = decoder.read_int()
+    if index_of_symbol >= len(writers_schema.symbols):
+      fail_msg = "Can't access enum index %d for enum with %d symbols"\
+                 % (index_of_symbol, len(writers_schema.symbols))
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+    read_symbol = writers_schema.symbols[index_of_symbol]
+
+    # schema resolution
+    if read_symbol not in readers_schema.symbols:
+      fail_msg = "Symbol %s not present in Reader's Schema" % read_symbol
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+
+    return read_symbol
+
+  def skip_enum(self, writers_schema, decoder):
+    return decoder.skip_int()
+
+  def read_array(self, writers_schema, readers_schema, decoder):
+    """
+    Arrays are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many array items.
+    A block with count zero indicates the end of the array.
+    Each item is encoded per the array's item schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    read_items = []
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_count = -block_count
+        block_size = decoder.read_long()
+      for i in range(block_count):
+        read_items.append(self.read_data(writers_schema.items,
+                                         readers_schema.items, decoder))
+      block_count = decoder.read_long()
+    return read_items
+
+  def skip_array(self, writers_schema, decoder):
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_size = decoder.read_long()
+        decoder.skip(block_size)
+      else:
+        for i in range(block_count):
+          self.skip_data(writers_schema.items, decoder)
+      block_count = decoder.read_long()
+
+  def read_map(self, writers_schema, readers_schema, decoder):
+    """
+    Maps are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many key/value pairs.
+    A block with count zero indicates the end of the map.
+    Each item is encoded per the map's value schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    read_items = {}
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_count = -block_count
+        block_size = decoder.read_long()
+      for i in range(block_count):
+        key = decoder.read_utf8()
+        read_items[key] = self.read_data(writers_schema.values,
+                                         readers_schema.values, decoder)
+      block_count = decoder.read_long()
+    return read_items
+
+  def skip_map(self, writers_schema, decoder):
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_size = decoder.read_long()
+        decoder.skip(block_size)
+      else:
+        for i in range(block_count):
+          decoder.skip_utf8()
+          self.skip_data(writers_schema.values, decoder)
+      block_count = decoder.read_long()
+
+  def read_union(self, writers_schema, readers_schema, decoder):
+    """
+    A union is encoded by first writing a long value indicating
+    the zero-based position within the union of the schema of its value.
+    The value is then encoded per the indicated schema within the union.
+    """
+    # schema resolution
+    index_of_schema = int(decoder.read_long())
+    if index_of_schema >= len(writers_schema.schemas):
+      fail_msg = "Can't access branch index %d for union with %d branches"\
+                 % (index_of_schema, len(writers_schema.schemas))
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+    selected_writers_schema = writers_schema.schemas[index_of_schema]
+    
+    # read data
+    return self.read_data(selected_writers_schema, readers_schema, decoder)
+
+  def skip_union(self, writers_schema, decoder):
+    index_of_schema = int(decoder.read_long())
+    if index_of_schema >= len(writers_schema.schemas):
+      fail_msg = "Can't access branch index %d for union with %d branches"\
+                 % (index_of_schema, len(writers_schema.schemas))
+      raise SchemaResolutionException(fail_msg, writers_schema)
+    return self.skip_data(writers_schema.schemas[index_of_schema], decoder)
+
+  def read_record(self, writers_schema, readers_schema, decoder):
+    """
+    A record is encoded by encoding the values of its fields
+    in the order that they are declared. In other words, a record
+    is encoded as just the concatenation of the encodings of its fields.
+    Field values are encoded per their schema.
+
+    Schema Resolution:
+     * the ordering of fields may be different: fields are matched by name.
+     * schemas for fields with the same name in both records are resolved
+       recursively.
+     * if the writer's record contains a field with a name not present in the
+       reader's record, the writer's value for that field is ignored.
+     * if the reader's record schema has a field that contains a default value,
+       and writer's schema does not have a field with the same name, then the
+       reader should use the default value from its field.
+     * if the reader's record schema has a field with no default value, and 
+       writer's schema does not have a field with the same name, then the
+       field's value is unset.
+    """
+    # schema resolution
+    readers_fields_dict = readers_schema.fields_dict
+    read_record = {}
+    for field in writers_schema.fields:
+      readers_field = readers_fields_dict.get(field.name)
+      if readers_field is not None:
+        field_val = self.read_data(field.type, readers_field.type, decoder)
+        read_record[field.name] = field_val
+      else:
+        self.skip_data(field.type, decoder)
+
+    # fill in default values
+    if len(readers_fields_dict) > len(read_record):
+      writers_fields_dict = writers_schema.fields_dict
+      for field_name, field in readers_fields_dict.items():
+        if not writers_fields_dict.has_key(field_name):
+          if field.has_default:
+            field_val = self._read_default_value(field.type, field.default)
+            read_record[field.name] = field_val
+          else:
+            fail_msg = 'No default value for field %s' % field_name
+            raise SchemaResolutionException(fail_msg, writers_schema,
+                                            readers_schema)
+    return read_record
+
+  def skip_record(self, writers_schema, decoder):
+    for field in writers_schema.fields:
+      self.skip_data(field.type, decoder)
+
+  def _read_default_value(self, field_schema, default_value):
+    """
+    Basically a JSON Decoder?
+    """
+    if field_schema.type == 'null':
+      return None
+    elif field_schema.type == 'boolean':
+      return bool(default_value)
+    elif field_schema.type == 'int':
+      return int(default_value)
+    elif field_schema.type == 'long':
+      return long(default_value)
+    elif field_schema.type in ['float', 'double']:
+      return float(default_value)
+    elif field_schema.type in ['enum', 'fixed', 'string', 'bytes']:
+      return default_value
+    elif field_schema.type == 'array':
+      read_array = []
+      for json_val in default_value:
+        item_val = self._read_default_value(field_schema.items, json_val)
+        read_array.append(item_val)
+      return read_array
+    elif field_schema.type == 'map':
+      read_map = {}
+      for key, json_val in default_value.items():
+        map_val = self._read_default_value(field_schema.values, json_val)
+        read_map[key] = map_val
+      return read_map
+    elif field_schema.type in ['union', 'error_union']:
+      return self._read_default_value(field_schema.schemas[0], default_value)
+    elif field_schema.type == 'record':
+      read_record = {}
+      for field in field_schema.fields:
+        json_val = default_value.get(field.name)
+        if json_val is None: json_val = field.default
+        field_val = self._read_default_value(field.type, json_val)
+        read_record[field.name] = field_val
+      return read_record
+    else:
+      fail_msg = 'Unknown type: %s' % field_schema.type
+      raise schema.AvroException(fail_msg)
+
+class DatumWriter(object):
+  """DatumWriter for generic python objects."""
+  def __init__(self, writers_schema=None):
+    self._writers_schema = writers_schema
+
+  # read/write properties
+  def set_writers_schema(self, writers_schema):
+    self._writers_schema = writers_schema
+  writers_schema = property(lambda self: self._writers_schema,
+                            set_writers_schema)
+
+  def write(self, datum, encoder):
+    # validate datum
+    if not validate(self.writers_schema, datum):
+      raise AvroTypeException(self.writers_schema, datum)
+    
+    self.write_data(self.writers_schema, datum, encoder)
+
+  def write_data(self, writers_schema, datum, encoder):
+    # function dispatch to write datum
+    if writers_schema.type == 'null':
+      encoder.write_null(datum)
+    elif writers_schema.type == 'boolean':
+      encoder.write_boolean(datum)
+    elif writers_schema.type == 'string':
+      encoder.write_utf8(datum)
+    elif writers_schema.type == 'int':
+      encoder.write_int(datum)
+    elif writers_schema.type == 'long':
+      encoder.write_long(datum)
+    elif writers_schema.type == 'float':
+      encoder.write_float(datum)
+    elif writers_schema.type == 'double':
+      encoder.write_double(datum)
+    elif writers_schema.type == 'bytes':
+      encoder.write_bytes(datum)
+    elif writers_schema.type == 'fixed':
+      self.write_fixed(writers_schema, datum, encoder)
+    elif writers_schema.type == 'enum':
+      self.write_enum(writers_schema, datum, encoder)
+    elif writers_schema.type == 'array':
+      self.write_array(writers_schema, datum, encoder)
+    elif writers_schema.type == 'map':
+      self.write_map(writers_schema, datum, encoder)
+    elif writers_schema.type in ['union', 'error_union']:
+      self.write_union(writers_schema, datum, encoder)
+    elif writers_schema.type in ['record', 'error', 'request']:
+      self.write_record(writers_schema, datum, encoder)
+    else:
+      fail_msg = 'Unknown type: %s' % writers_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def write_fixed(self, writers_schema, datum, encoder):
+    """
+    Fixed instances are encoded using the number of bytes declared
+    in the schema.
+    """
+    encoder.write(datum)
+
+  def write_enum(self, writers_schema, datum, encoder):
+    """
+    An enum is encoded by a int, representing the zero-based position
+    of the symbol in the schema.
+    """
+    index_of_datum = writers_schema.symbols.index(datum)
+    encoder.write_int(index_of_datum)
+
+  def write_array(self, writers_schema, datum, encoder):
+    """
+    Arrays are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many array items.
+    A block with count zero indicates the end of the array.
+    Each item is encoded per the array's item schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    if len(datum) > 0:
+      encoder.write_long(len(datum))
+      for item in datum:
+        self.write_data(writers_schema.items, item, encoder)
+    encoder.write_long(0)
+
+  def write_map(self, writers_schema, datum, encoder):
+    """
+    Maps are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many key/value pairs.
+    A block with count zero indicates the end of the map.
+    Each item is encoded per the map's value schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    if len(datum) > 0:
+      encoder.write_long(len(datum))
+      for key, val in datum.items():
+        encoder.write_utf8(key)
+        self.write_data(writers_schema.values, val, encoder)
+    encoder.write_long(0)
+
+  def write_union(self, writers_schema, datum, encoder):
+    """
+    A union is encoded by first writing a long value indicating
+    the zero-based position within the union of the schema of its value.
+    The value is then encoded per the indicated schema within the union.
+    """
+    # resolve union
+    index_of_schema = -1
+    for i, candidate_schema in enumerate(writers_schema.schemas):
+      if validate(candidate_schema, datum):
+        index_of_schema = i
+    if index_of_schema < 0: raise AvroTypeException(writers_schema, datum)
+
+    # write data
+    encoder.write_long(index_of_schema)
+    self.write_data(writers_schema.schemas[index_of_schema], datum, encoder)
+
+  def write_record(self, writers_schema, datum, encoder):
+    """
+    A record is encoded by encoding the values of its fields
+    in the order that they are declared. In other words, a record
+    is encoded as just the concatenation of the encodings of its fields.
+    Field values are encoded per their schema.
+    """
+    for field in writers_schema.fields:
+      self.write_data(field.type, datum.get(field.name), encoder)
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/ipc.py b/desktop/core/ext-py/avro-1.5.0/src/avro/ipc.py
new file mode 100644
index 0000000..321887d
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/ipc.py
@@ -0,0 +1,510 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Support for inter-process calls.
+"""
+import httplib
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from avro import io
+from avro import protocol
+from avro import schema
+
+#
+# Constants
+#
+
+# Handshake schema is pulled in during build
+HANDSHAKE_REQUEST_SCHEMA = schema.parse("""
+{
+    "type": "record",
+    "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
+    "fields": [
+        {"name": "clientHash",
+	 "type": {"type": "fixed", "name": "MD5", "size": 16}},
+        {"name": "clientProtocol", "type": ["null", "string"]},
+        {"name": "serverHash", "type": "MD5"},
+ 	{"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
+ ]
+}
+
+""")
+
+HANDSHAKE_RESPONSE_SCHEMA = schema.parse("""
+{
+    "type": "record",
+    "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
+    "fields": [
+        {"name": "match",
+         "type": {"type": "enum", "name": "HandshakeMatch",
+                  "symbols": ["BOTH", "CLIENT", "NONE"]}},
+        {"name": "serverProtocol",
+         "type": ["null", "string"]},
+        {"name": "serverHash",
+         "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
+ 	{"name": "meta",
+         "type": ["null", {"type": "map", "values": "bytes"}]}
+    ]
+}
+
+""")
+
+HANDSHAKE_REQUESTOR_WRITER = io.DatumWriter(HANDSHAKE_REQUEST_SCHEMA)
+HANDSHAKE_REQUESTOR_READER = io.DatumReader(HANDSHAKE_RESPONSE_SCHEMA)
+HANDSHAKE_RESPONDER_WRITER = io.DatumWriter(HANDSHAKE_RESPONSE_SCHEMA)
+HANDSHAKE_RESPONDER_READER = io.DatumReader(HANDSHAKE_REQUEST_SCHEMA)
+
+META_SCHEMA = schema.parse('{"type": "map", "values": "bytes"}')
+META_WRITER = io.DatumWriter(META_SCHEMA)
+META_READER = io.DatumReader(META_SCHEMA)
+
+SYSTEM_ERROR_SCHEMA = schema.parse('["string"]')
+
+# protocol cache
+REMOTE_HASHES = {}
+REMOTE_PROTOCOLS = {}
+
+BIG_ENDIAN_INT_STRUCT = io.struct_class('!I')
+BUFFER_HEADER_LENGTH = 4
+BUFFER_SIZE = 8192
+
+#
+# Exceptions
+#
+
+class AvroRemoteException(schema.AvroException):
+  """
+  Raised when an error message is sent by an Avro requestor or responder.
+  """
+  def __init__(self, fail_msg=None):
+    schema.AvroException.__init__(self, fail_msg)
+
+class ConnectionClosedException(schema.AvroException):
+  pass
+
+#
+# Base IPC Classes (Requestor/Responder)
+#
+
+class BaseRequestor(object):
+  """Base class for the client side of a protocol interaction."""
+  def __init__(self, local_protocol, transceiver):
+    self._local_protocol = local_protocol
+    self._transceiver = transceiver
+    self._remote_protocol = None
+    self._remote_hash = None
+    self._send_protocol = None
+
+  # read-only properties
+  local_protocol = property(lambda self: self._local_protocol)
+  transceiver = property(lambda self: self._transceiver)
+
+  # read/write properties
+  def set_remote_protocol(self, new_remote_protocol):
+    self._remote_protocol = new_remote_protocol
+    REMOTE_PROTOCOLS[self.transceiver.remote_name] = self.remote_protocol
+  remote_protocol = property(lambda self: self._remote_protocol,
+                             set_remote_protocol)
+
+  def set_remote_hash(self, new_remote_hash):
+    self._remote_hash = new_remote_hash
+    REMOTE_HASHES[self.transceiver.remote_name] = self.remote_hash
+  remote_hash = property(lambda self: self._remote_hash, set_remote_hash)
+
+  def set_send_protocol(self, new_send_protocol):
+    self._send_protocol = new_send_protocol
+  send_protocol = property(lambda self: self._send_protocol, set_send_protocol)
+
+  def request(self, message_name, request_datum):
+    """
+    Writes a request message and reads a response or error message.
+    """
+    # build handshake and call request
+    buffer_writer = StringIO()
+    buffer_encoder = io.BinaryEncoder(buffer_writer)
+    self.write_handshake_request(buffer_encoder)
+    self.write_call_request(message_name, request_datum, buffer_encoder)
+
+    # send the handshake and call request; block until call response
+    call_request = buffer_writer.getvalue()
+    return self.issue_request(call_request, message_name, request_datum)
+
+  def write_handshake_request(self, encoder):
+    local_hash = self.local_protocol.md5
+    remote_name = self.transceiver.remote_name
+    remote_hash = REMOTE_HASHES.get(remote_name)
+    if remote_hash is None:
+      remote_hash = local_hash
+      self.remote_protocol = self.local_protocol
+    request_datum = {}
+    request_datum['clientHash'] = local_hash
+    request_datum['serverHash'] = remote_hash
+    if self.send_protocol:
+      request_datum['clientProtocol'] = str(self.local_protocol)
+    HANDSHAKE_REQUESTOR_WRITER.write(request_datum, encoder)
+
+  def write_call_request(self, message_name, request_datum, encoder):
+    """
+    The format of a call request is:
+      * request metadata, a map with values of type bytes
+      * the message name, an Avro string, followed by
+      * the message parameters. Parameters are serialized according to
+        the message's request declaration.
+    """
+    # request metadata (not yet implemented)
+    request_metadata = {}
+    META_WRITER.write(request_metadata, encoder)
+
+    # message name
+    message = self.local_protocol.messages.get(message_name)
+    if message is None:
+      raise schema.AvroException('Unknown message: %s' % message_name)
+    encoder.write_utf8(message.name)
+
+    # message parameters
+    self.write_request(message.request, request_datum, encoder)
+
+  def write_request(self, request_schema, request_datum, encoder):
+    datum_writer = io.DatumWriter(request_schema)
+    datum_writer.write(request_datum, encoder)
+
+  def read_handshake_response(self, decoder):
+    handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
+    match = handshake_response.get('match')
+    if match == 'BOTH':
+      self.send_protocol = False
+      return True
+    elif match == 'CLIENT':
+      if self.send_protocol:
+        raise schema.AvroException('Handshake failure.')
+      self.remote_protocol = protocol.parse(
+                             handshake_response.get('serverProtocol'))
+      self.remote_hash = handshake_response.get('serverHash')
+      self.send_protocol = False
+      return True
+    elif match == 'NONE':
+      if self.send_protocol:
+        raise schema.AvroException('Handshake failure.')
+      self.remote_protocol = protocol.parse(
+                             handshake_response.get('serverProtocol'))
+      self.remote_hash = handshake_response.get('serverHash')
+      self.send_protocol = True
+      return False
+    else:
+      raise schema.AvroException('Unexpected match: %s' % match)
+
+  def read_call_response(self, message_name, decoder):
+    """
+    The format of a call response is:
+      * response metadata, a map with values of type bytes
+      * a one-byte error flag boolean, followed by either:
+        o if the error flag is false,
+          the message response, serialized per the message's response schema.
+        o if the error flag is true, 
+          the error, serialized per the message's error union schema.
+    """
+    # response metadata
+    response_metadata = META_READER.read(decoder)
+
+    # remote response schema
+    remote_message_schema = self.remote_protocol.messages.get(message_name)
+    if remote_message_schema is None:
+      raise schema.AvroException('Unknown remote message: %s' % message_name)
+
+    # local response schema
+    local_message_schema = self.local_protocol.messages.get(message_name)
+    if local_message_schema is None:
+      raise schema.AvroException('Unknown local message: %s' % message_name)
+
+    # error flag
+    if not decoder.read_boolean():
+      writers_schema = remote_message_schema.response
+      readers_schema = local_message_schema.response
+      return self.read_response(writers_schema, readers_schema, decoder)
+    else:
+      writers_schema = remote_message_schema.errors
+      readers_schema = local_message_schema.errors
+      raise self.read_error(writers_schema, readers_schema, decoder)
+
+  def read_response(self, writers_schema, readers_schema, decoder):
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    result = datum_reader.read(decoder)
+    return result
+
+  def read_error(self, writers_schema, readers_schema, decoder):
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    return AvroRemoteException(datum_reader.read(decoder))
+
+class Requestor(BaseRequestor):
+
+  def issue_request(self, call_request, message_name, request_datum):
+    call_response = self.transceiver.transceive(call_request)
+
+    # process the handshake and call response
+    buffer_decoder = io.BinaryDecoder(StringIO(call_response))
+    call_response_exists = self.read_handshake_response(buffer_decoder)
+    if call_response_exists:
+      return self.read_call_response(message_name, buffer_decoder)
+    else:
+      return self.request(message_name, request_datum)
+
+class Responder(object):
+  """Base class for the server side of a protocol interaction."""
+  def __init__(self, local_protocol):
+    self._local_protocol = local_protocol
+    self._local_hash = self.local_protocol.md5
+    self._protocol_cache = {}
+    self.set_protocol_cache(self.local_hash, self.local_protocol)
+
+  # read-only properties
+  local_protocol = property(lambda self: self._local_protocol)
+  local_hash = property(lambda self: self._local_hash)
+  protocol_cache = property(lambda self: self._protocol_cache)
+
+  # utility functions to manipulate protocol cache
+  def get_protocol_cache(self, hash):
+    return self.protocol_cache.get(hash)
+  def set_protocol_cache(self, hash, protocol):
+    self.protocol_cache[hash] = protocol
+
+  def respond(self, call_request):
+    """
+    Called by a server to deserialize a request, compute and serialize
+    a response or error. Compare to 'handle()' in Thrift.
+    """
+    buffer_reader = StringIO(call_request)
+    buffer_decoder = io.BinaryDecoder(buffer_reader)
+    buffer_writer = StringIO()
+    buffer_encoder = io.BinaryEncoder(buffer_writer)
+    error = None
+    response_metadata = {}
+    
+    try:
+      remote_protocol = self.process_handshake(buffer_decoder, buffer_encoder)
+      # handshake failure
+      if remote_protocol is None:  
+        return buffer_writer.getvalue()
+
+      # read request using remote protocol
+      request_metadata = META_READER.read(buffer_decoder)
+      remote_message_name = buffer_decoder.read_utf8()
+
+      # get remote and local request schemas so we can do
+      # schema resolution (one fine day)
+      remote_message = remote_protocol.messages.get(remote_message_name)
+      if remote_message is None:
+        fail_msg = 'Unknown remote message: %s' % remote_message_name
+        raise schema.AvroException(fail_msg)
+      local_message = self.local_protocol.messages.get(remote_message_name)
+      if local_message is None:
+        fail_msg = 'Unknown local message: %s' % remote_message_name
+        raise schema.AvroException(fail_msg)
+      writers_schema = remote_message.request
+      readers_schema = local_message.request
+      request = self.read_request(writers_schema, readers_schema,
+                                  buffer_decoder)
+
+      # perform server logic
+      try:
+        response = self.invoke(local_message, request)
+      except AvroRemoteException, e:
+        error = e
+      except Exception, e:
+        error = AvroRemoteException(str(e))
+
+      # write response using local protocol
+      META_WRITER.write(response_metadata, buffer_encoder)
+      buffer_encoder.write_boolean(error is not None)
+      if error is None:
+        writers_schema = local_message.response
+        self.write_response(writers_schema, response, buffer_encoder)
+      else:
+        writers_schema = local_message.errors
+        self.write_error(writers_schema, error, buffer_encoder)
+    except schema.AvroException, e:
+      error = AvroRemoteException(str(e))
+      buffer_encoder = io.BinaryEncoder(StringIO())
+      META_WRITER.write(response_metadata, buffer_encoder)
+      buffer_encoder.write_boolean(True)
+      self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
+    return buffer_writer.getvalue()
+
+  def process_handshake(self, decoder, encoder):
+    handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
+    handshake_response = {}
+
+    # determine the remote protocol
+    client_hash = handshake_request.get('clientHash')
+    client_protocol = handshake_request.get('clientProtocol')
+    remote_protocol = self.get_protocol_cache(client_hash)
+    if remote_protocol is None and client_protocol is not None:
+      remote_protocol = protocol.parse(client_protocol)
+      self.set_protocol_cache(client_hash, remote_protocol)
+
+    # evaluate remote's guess of the local protocol
+    server_hash = handshake_request.get('serverHash')
+    if self.local_hash == server_hash:
+      if remote_protocol is None:
+        handshake_response['match'] = 'NONE'
+      else:
+        handshake_response['match'] = 'BOTH'
+    else:
+      if remote_protocol is None:
+        handshake_response['match'] = 'NONE'
+      else:
+        handshake_response['match'] = 'CLIENT'
+
+    if handshake_response['match'] != 'BOTH':
+      handshake_response['serverProtocol'] = str(self.local_protocol)
+      handshake_response['serverHash'] = self.local_hash
+
+    HANDSHAKE_RESPONDER_WRITER.write(handshake_response, encoder)
+    return remote_protocol
+
+  def invoke(self, local_message, request):
+    """
+    Aactual work done by server: cf. handler in thrift.
+    """
+    pass
+
+  def read_request(self, writers_schema, readers_schema, decoder):
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    return datum_reader.read(decoder)
+
+  def write_response(self, writers_schema, response_datum, encoder):
+    datum_writer = io.DatumWriter(writers_schema)
+    datum_writer.write(response_datum, encoder)
+
+  def write_error(self, writers_schema, error_exception, encoder):
+    datum_writer = io.DatumWriter(writers_schema)
+    datum_writer.write(str(error_exception), encoder)
+
+#
+# Utility classes
+#
+
+class FramedReader(object):
+  """Wrapper around a file-like object to read framed data."""
+  def __init__(self, reader):
+    self._reader = reader
+
+  # read-only properties
+  reader = property(lambda self: self._reader)
+
+  def read_framed_message(self):
+    message = []
+    while True:
+      buffer = StringIO()
+      buffer_length = self._read_buffer_length()
+      if buffer_length == 0:
+        return ''.join(message)
+      while buffer.tell() < buffer_length:
+        chunk = self.reader.read(buffer_length - buffer.tell())
+        if chunk == '':
+          raise ConnectionClosedException("Reader read 0 bytes.")
+        buffer.write(chunk)
+      message.append(buffer.getvalue())
+
+  def _read_buffer_length(self):
+    read = self.reader.read(BUFFER_HEADER_LENGTH)
+    if read == '':
+      raise ConnectionClosedException("Reader read 0 bytes.")
+    return BIG_ENDIAN_INT_STRUCT.unpack(read)[0]
+
+class FramedWriter(object):
+  """Wrapper around a file-like object to write framed data."""
+  def __init__(self, writer):
+    self._writer = writer
+
+  # read-only properties
+  writer = property(lambda self: self._writer)
+
+  def write_framed_message(self, message):
+    message_length = len(message)
+    total_bytes_sent = 0
+    while message_length - total_bytes_sent > 0:
+      if message_length - total_bytes_sent > BUFFER_SIZE:
+        buffer_length = BUFFER_SIZE
+      else:
+        buffer_length = message_length - total_bytes_sent
+      self.write_buffer(message[total_bytes_sent:
+                                (total_bytes_sent + buffer_length)])
+      total_bytes_sent += buffer_length
+    # A message is always terminated by a zero-length buffer.
+    self.write_buffer_length(0)
+
+  def write_buffer(self, chunk):
+    buffer_length = len(chunk)
+    self.write_buffer_length(buffer_length)
+    self.writer.write(chunk)
+
+  def write_buffer_length(self, n):
+    self.writer.write(BIG_ENDIAN_INT_STRUCT.pack(n))
+
+#
+# Transceiver Implementations
+#
+
+class HTTPTransceiver(object):
+  """
+  A simple HTTP-based transceiver implementation.
+  Useful for clients but not for servers
+  """
+  def __init__(self, host, port):
+    self.conn = httplib.HTTPConnection(host, port)
+    self.conn.connect()
+
+  # read-only properties
+  sock = property(lambda self: self.conn.sock)
+  remote_name = property(lambda self: self.sock.getsockname())
+
+  # read/write properties
+  def set_conn(self, new_conn):
+    self._conn = new_conn
+  conn = property(lambda self: self._conn, set_conn)
+
+  def transceive(self, request):
+    self.write_framed_message(request)
+    result = self.read_framed_message()
+    return result
+
+  def read_framed_message(self):
+    response = self.conn.getresponse()
+    response_reader = FramedReader(response)
+    framed_message = response_reader.read_framed_message()
+    response.read()    # ensure we're ready for subsequent requests
+    return framed_message
+
+  def write_framed_message(self, message):
+    req_method = 'POST'
+    req_resource = '/'
+    req_headers = {'Content-Type': 'avro/binary'}
+
+    req_body_buffer = FramedWriter(StringIO())
+    req_body_buffer.write_framed_message(message)
+    req_body = req_body_buffer.writer.getvalue()
+
+    self.conn.request(req_method, req_resource, req_body, req_headers)
+
+  def close(self):
+    self.conn.close()
+
+#
+# Server Implementations (none yet)
+#
+
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/protocol.py b/desktop/core/ext-py/avro-1.5.0/src/avro/protocol.py
new file mode 100644
index 0000000..104817e
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/protocol.py
@@ -0,0 +1,222 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Protocol implementation.
+"""
+try:
+  from hashlib import md5
+except ImportError:
+  from md5 import md5
+try:
+  import json
+except ImportError:
+  import simplejson as json
+from avro import schema
+
+#
+# Constants
+#
+
+# TODO(hammer): confirmed 'fixed' with Doug
+VALID_TYPE_SCHEMA_TYPES = ('enum', 'record', 'error', 'fixed')
+
+#
+# Exceptions
+#
+
+class ProtocolParseException(schema.AvroException):
+  pass
+
+#
+# Base Classes
+#
+
+class Protocol(object):
+  """An application protocol."""
+  def _parse_types(self, types, type_names):
+    type_objects = []
+    for type in types:
+      type_object = schema.make_avsc_object(type, type_names)
+      if type_object.type not in VALID_TYPE_SCHEMA_TYPES:
+        fail_msg = 'Type %s not an enum, fixed, record, or error.' % type
+        raise ProtocolParseException(fail_msg)
+      type_objects.append(type_object)
+    return type_objects
+
+  def _parse_messages(self, messages, names):
+    message_objects = {}
+    for name, body in messages.iteritems():
+      if message_objects.has_key(name):
+        fail_msg = 'Message name "%s" repeated.' % name
+        raise ProtocolParseException(fail_msg)
+      elif not(hasattr(body, 'get') and callable(body.get)):
+        fail_msg = 'Message name "%s" has non-object body %s.' % (name, body)
+        raise ProtocolParseException(fail_msg)
+      request = body.get('request')
+      response = body.get('response')
+      errors = body.get('errors')
+      message_objects[name] = Message(name, request, response, errors, names)
+    return message_objects
+
+  def __init__(self, name, namespace=None, types=None, messages=None):
+    # Ensure valid ctor args
+    if not name:
+      fail_msg = 'Protocols must have a non-empty name.'
+      raise ProtocolParseException(fail_msg)
+    elif not isinstance(name, basestring):
+      fail_msg = 'The name property must be a string.'
+      raise ProtocolParseException(fail_msg)
+    elif namespace is not None and not isinstance(namespace, basestring):
+      fail_msg = 'The namespace property must be a string.'
+      raise ProtocolParseException(fail_msg)
+    elif types is not None and not isinstance(types, list):
+      fail_msg = 'The types property must be a list.'
+      raise ProtocolParseException(fail_msg)
+    elif (messages is not None and 
+          not(hasattr(messages, 'get') and callable(messages.get))):
+      fail_msg = 'The messages property must be a JSON object.'
+      raise ProtocolParseException(fail_msg)
+
+    self._props = {}
+    self.set_prop('name', name)
+    type_names = schema.Names()
+    if namespace is not None: 
+      self.set_prop('namespace', namespace)
+      type_names.default_namespace = namespace
+    if types is not None:
+      self.set_prop('types', self._parse_types(types, type_names))
+    if messages is not None:
+      self.set_prop('messages', self._parse_messages(messages, type_names))
+    self._md5 = md5(str(self)).digest()
+
+  # read-only properties
+  name = property(lambda self: self.get_prop('name'))
+  namespace = property(lambda self: self.get_prop('namespace'))
+  fullname = property(lambda self: 
+                      schema.Name(self.name, self.namespace).fullname)
+  types = property(lambda self: self.get_prop('types'))
+  types_dict = property(lambda self: dict([(type.name, type)
+                                           for type in self.types]))
+  messages = property(lambda self: self.get_prop('messages'))
+  md5 = property(lambda self: self._md5)
+  props = property(lambda self: self._props)
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self.props.get(key)
+  def set_prop(self, key, value):
+    self.props[key] = value  
+
+  def to_json(self):
+    to_dump = {}
+    to_dump['protocol'] = self.name
+    names = schema.Names()
+    if self.namespace: 
+      to_dump['namespace'] = self.namespace
+    if self.types:
+      to_dump['types'] = [ t.to_json(names) for t in self.types ]
+    if self.messages:
+      messages_dict = {}
+      for name, body in self.messages.iteritems():
+        messages_dict[name] = body.to_json(names)
+      to_dump['messages'] = messages_dict
+    return to_dump
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class Message(object):
+  """A Protocol message."""
+  def _parse_request(self, request, names):
+    if not isinstance(request, list):
+      fail_msg = 'Request property not a list: %s' % request
+      raise ProtocolParseException(fail_msg)
+    return schema.RecordSchema(None, None, request, names, 'request')
+  
+  def _parse_response(self, response, names):
+    if isinstance(response, basestring) and names.has_name(response, None):
+      return names.get_name(response, None)
+    else:
+      return schema.make_avsc_object(response, names)
+
+  def _parse_errors(self, errors, names):
+    if not isinstance(errors, list):
+      fail_msg = 'Errors property not a list: %s' % errors
+      raise ProtocolParseException(fail_msg)
+    errors_for_parsing = {'type': 'error_union', 'declared_errors': errors}
+    return schema.make_avsc_object(errors_for_parsing, names)
+
+  def __init__(self,  name, request, response, errors=None, names=None):
+    self._name = name
+
+    self._props = {}
+    self.set_prop('request', self._parse_request(request, names))
+    self.set_prop('response', self._parse_response(response, names))
+    if errors is not None:
+      self.set_prop('errors', self._parse_errors(errors, names))
+
+  # read-only properties
+  name = property(lambda self: self._name)
+  request = property(lambda self: self.get_prop('request'))
+  response = property(lambda self: self.get_prop('response'))
+  errors = property(lambda self: self.get_prop('errors'))
+  props = property(lambda self: self._props)
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self.props.get(key)
+  def set_prop(self, key, value):
+    self.props[key] = value  
+
+  def __str__(self):
+    return json.dumps(self.to_json(schema.Names()))
+
+  def to_json(self, names):
+    to_dump = {}
+    to_dump['request'] = self.request.to_json(names)
+    to_dump['response'] = self.response.to_json(names)
+    if self.errors:
+      to_dump['errors'] = self.errors.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    return self.name == that.name and self.props == that.props
+      
+def make_avpr_object(json_data):
+  """Build Avro Protocol from data parsed out of JSON string."""
+  if hasattr(json_data, 'get') and callable(json_data.get):
+    name = json_data.get('protocol')
+    namespace = json_data.get('namespace')
+    types = json_data.get('types')
+    messages = json_data.get('messages')
+    return Protocol(name, namespace, types, messages)
+  else:
+    raise ProtocolParseException('Not a JSON object: %s' % json_data)
+
+def parse(json_string):
+  """Constructs the Protocol from the JSON text."""
+  try:
+    json_data = json.loads(json_string)
+  except:
+    raise ProtocolParseException('Error parsing JSON: %s' % json_string)
+
+  # construct the Avro Protocol object
+  return make_avpr_object(json_data)
+
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/schema.py b/desktop/core/ext-py/avro-1.5.0/src/avro/schema.py
new file mode 100644
index 0000000..24718c0
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/schema.py
@@ -0,0 +1,707 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Contains the Schema classes.
+
+A schema may be one of:
+  A record, mapping field names to field value data;
+  An error, equivalent to a record;
+  An enum, containing one of a small set of symbols;
+  An array of values, all of the same schema;
+  A map containing string/value pairs, each of a declared schema;
+  A union of other schemas;
+  A fixed sized binary object;
+  A unicode string;
+  A sequence of bytes;
+  A 32-bit signed int;
+  A 64-bit signed long;
+  A 32-bit floating-point float;
+  A 64-bit floating-point double;
+  A boolean; or
+  Null.
+"""
+try:
+  import json
+except ImportError:
+  import simplejson as json
+
+#
+# Constants
+#
+
+PRIMITIVE_TYPES = (
+  'null',
+  'boolean',
+  'string',
+  'bytes',
+  'int',
+  'long',
+  'float',
+  'double',
+)
+
+NAMED_TYPES = (
+  'fixed',
+  'enum',
+  'record',
+  'error',
+)
+
+VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + (
+  'array',
+  'map',
+  'union',
+  'request',
+  'error_union'
+)
+
+RESERVED_PROPS = (
+  'type',
+  'name',
+  'namespace',
+  'fields',     # Record
+  'items',      # Array
+  'size',       # Fixed
+  'symbols',    # Enum
+  'values',     # Map
+)
+
+VALID_FIELD_SORT_ORDERS = (
+  'ascending',
+  'descending',
+  'ignore',
+)
+
+#
+# Exceptions
+#
+
+class AvroException(Exception):
+  pass
+
+class SchemaParseException(AvroException):
+  pass
+
+#
+# Base Classes
+#
+
+class Schema(object):
+  """Base class for all Schema classes."""
+  def __init__(self, type):
+    # Ensure valid ctor args
+    if not isinstance(type, basestring):
+      fail_msg = 'Schema type must be a string.'
+      raise SchemaParseException(fail_msg)
+    elif type not in VALID_TYPES:
+      fail_msg = '%s is not a valid type.' % type
+      raise SchemaParseException(fail_msg)
+
+    # add members
+    if not hasattr(self, '_props'): self._props = {}
+    self.set_prop('type', type)
+
+  # Read-only properties dict. Printing schemas
+  # creates JSON properties directly from this dict. 
+  props = property(lambda self: self._props)
+  type = property(lambda self: self.get_prop('type'))
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self.props.get(key)
+
+  def set_prop(self, key, value):
+    self.props[key] = value
+
+  def __str__(self):
+    names = Names()
+    return json.dumps(self.to_json(names))
+
+  def to_json(self, names):
+    """
+    Converts the schema object into its AVRO specification representation.
+
+    Schema types that have names (records, enums, and fixed) must
+    be aware of not re-defining schemas that are already listed
+    in the parameter names.
+    """
+    raise Exception("Must be implemented by subclasses.")
+
+class Name(object):
+  """Class to describe Avro name."""
+  
+  def __init__(self, name_attr, space_attr, default_space):
+    """
+    Formulate full name according to the specification.
+    
+    @arg name_attr: name value read in schema or None.
+    @arg space_attr: namespace value read in schema or None.
+    @ard default_space: the current default space or None.
+    """
+    # Ensure valid ctor args
+    if not (isinstance(name_attr, basestring) or (name_attr is None)):
+      fail_msg = 'Name must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Name must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+
+    if not (isinstance(space_attr, basestring) or (space_attr is None)):
+      fail_msg = 'Space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+  
+    if not (isinstance(default_space, basestring) or (default_space is None)):
+      fail_msg = 'Default space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Default must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    
+    self._full = None; 
+    
+    if name_attr is None or name_attr == "":
+        return;
+    
+    if (name_attr.find('.') < 0):
+      if (space_attr is not None) and (space_attr != ""):
+        self._full = "%s.%s" % (space_attr, name_attr)
+      else:
+        if (default_space is not None) and (default_space != ""):
+           self._full = "%s.%s" % (default_space, name_attr)
+        else:
+          self._full = name_attr
+    else:
+        self._full = name_attr         
+    
+  def __eq__(self, other):
+    if not isinstance(other, Name):
+        return False
+    return (self.fullname == other.fullname)
+      
+  fullname = property(lambda self: self._full)
+
+  def get_space(self):
+    """Back out a namespace from full name."""
+    if self._full is None:
+        return None
+    
+    if (self._full.find('.') > 0):
+      return self._full.rsplit(".", 1)[0]
+    else:
+      return ""
+
+class Names(object):
+  """Track name set and default namespace during parsing."""
+  def __init__(self, default_namespace=None):
+      self.names = {}
+      self.default_namespace = default_namespace
+      
+  def has_name(self, name_attr, space_attr):
+      test = Name(name_attr, space_attr, self.default_namespace).fullname
+      return self.names.has_key(test)
+  
+  def get_name(self, name_attr, space_attr):    
+      test = Name(name_attr, space_attr, self.default_namespace).fullname
+      if not self.names.has_key(test):
+          return None
+      return self.names[test]
+      
+  def add_name(self, name_attr, space_attr, new_schema):
+    """
+    Add a new schema object to the name set.
+    
+      @arg name_attr: name value read in schema
+      @arg space_attr: namespace value read in schema.
+      
+      @return: the Name that was just added.
+    """
+    to_add = Name(name_attr, space_attr, self.default_namespace)
+    
+    if to_add.fullname in VALID_TYPES:
+      fail_msg = '%s is a reserved type name.' % to_add.fullname
+      raise SchemaParseException(fail_msg)
+    elif self.names.has_key(to_add.fullname):
+      fail_msg = 'The name "%s" is already in use.' % to_add.fullname
+      raise SchemaParseException(fail_msg)
+
+    self.names[to_add.fullname] = new_schema
+    return to_add
+
+class NamedSchema(Schema):
+  """Named Schemas specified in NAMED_TYPES."""
+  def __init__(self, type, name, namespace=None, names=None):
+    # Ensure valid ctor args
+    if not name:
+      fail_msg = 'Named Schemas must have a non-empty name.'
+      raise SchemaParseException(fail_msg)
+    elif not isinstance(name, basestring):
+      fail_msg = 'The name property must be a string.'
+      raise SchemaParseException(fail_msg)
+    elif namespace is not None and not isinstance(namespace, basestring):
+      fail_msg = 'The namespace property must be a string.'
+      raise SchemaParseException(fail_msg)
+
+    # Call parent ctor
+    Schema.__init__(self, type)
+
+    # Add class members
+    new_name = names.add_name(name, namespace, self)
+
+    # Store name and namespace as they were read in origin schema
+    self.set_prop('name', name)
+    if namespace is not None: 
+      self.set_prop('namespace', new_name.get_space())
+
+    # Store full name as calculated from name, namespace
+    self._fullname = new_name.fullname
+    
+  def name_ref(self, names):
+    if self.namespace == names.default_namespace:
+      return self.name
+    else:
+      return self.fullname
+
+  # read-only properties
+  name = property(lambda self: self.get_prop('name'))
+  namespace = property(lambda self: self.get_prop('namespace'))
+  fullname = property(lambda self: self._fullname)
+
+class Field(object):
+  def __init__(self, type, name, has_default, default=None, order=None, names=None):
+    # Ensure valid ctor args
+    if not name:
+      fail_msg = 'Fields must have a non-empty name.'
+      raise SchemaParseException(fail_msg)
+    elif not isinstance(name, basestring):
+      fail_msg = 'The name property must be a string.'
+      raise SchemaParseException(fail_msg)
+    elif order is not None and order not in VALID_FIELD_SORT_ORDERS:
+      fail_msg = 'The order property %s is not valid.' % order
+      raise SchemaParseException(fail_msg)
+
+    # add members
+    self._props = {}
+    self._has_default = has_default
+
+    if (isinstance(type, basestring) and names is not None
+        and names.has_name(type, None)):
+      type_schema = names.get_name(type, None)
+    else:
+      try:
+        type_schema = make_avsc_object(type, names)
+      except Exception, e:
+        fail_msg = 'Type property "%s" not a valid Avro schema: %s' % (type, e)
+        raise SchemaParseException(fail_msg)
+    self.set_prop('type', type_schema)
+    self.set_prop('name', name)
+    # TODO(hammer): check to ensure default is valid
+    if has_default: self.set_prop('default', default)
+    if order is not None: self.set_prop('order', order)
+
+  # read-only properties
+  type = property(lambda self: self.get_prop('type'))
+  name = property(lambda self: self.get_prop('name'))
+  default = property(lambda self: self.get_prop('default'))
+  has_default = property(lambda self: self._has_default)
+  order = property(lambda self: self.get_prop('order'))
+  props = property(lambda self: self._props)
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self.props.get(key)
+  def set_prop(self, key, value):
+    self.props[key] = value
+
+  def to_json(self, names):
+    to_dump = self.props.copy()
+    to_dump['type'] = self.type.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+#
+# Primitive Types
+#
+class PrimitiveSchema(Schema):
+  """Valid primitive types are in PRIMITIVE_TYPES."""
+  def __init__(self, type):
+    # Ensure valid ctor args
+    if type not in PRIMITIVE_TYPES:
+      raise AvroException("%s is not a valid primitive type." % type)
+
+    # Call parent ctor
+    Schema.__init__(self, type)
+
+    self.fullname = type
+
+  def to_json(self, names):
+    if len(self.props) == 1:
+      return self.fullname
+    else:
+      return self.props
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+#
+# Complex Types (non-recursive)
+#
+
+class FixedSchema(NamedSchema):
+  def __init__(self, name, namespace, size, names=None):
+    # Ensure valid ctor args
+    if not isinstance(size, int):
+      fail_msg = 'Fixed Schema requires a valid integer for size property.'
+      raise AvroException(fail_msg)
+
+    # Call parent ctor
+    NamedSchema.__init__(self, 'fixed', name, namespace, names)
+
+    # Add class members
+    self.set_prop('size', size)
+
+  # read-only properties
+  size = property(lambda self: self.get_prop('size'))
+
+  def to_json(self, names):
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+      return self.props
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+class EnumSchema(NamedSchema):
+  def __init__(self, name, namespace, symbols, names=None):
+    # Ensure valid ctor args
+    if not isinstance(symbols, list):
+      fail_msg = 'Enum Schema requires a JSON array for the symbols property.'
+      raise AvroException(fail_msg)
+    elif False in [isinstance(s, basestring) for s in symbols]:
+      fail_msg = 'Enum Schems requires All symbols to be JSON strings.'
+      raise AvroException(fail_msg)
+    elif len(set(symbols)) < len(symbols):
+      fail_msg = 'Duplicate symbol: %s' % symbols
+      raise AvroException(fail_msg)
+
+    # Call parent ctor
+    NamedSchema.__init__(self, 'enum', name, namespace, names)
+
+    # Add class members
+    self.set_prop('symbols', symbols)
+
+  # read-only properties
+  symbols = property(lambda self: self.get_prop('symbols'))
+
+  def to_json(self, names):
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+      return self.props
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+#
+# Complex Types (recursive)
+#
+
+class ArraySchema(Schema):
+  def __init__(self, items, names=None):
+    # Call parent ctor
+    Schema.__init__(self, 'array')
+    # Add class members
+
+    if isinstance(items, basestring) and names.has_name(items, None):
+      items_schema = names.get_name(items, None)
+    else:
+      try:
+        items_schema = make_avsc_object(items, names)
+      except SchemaParseException, e:
+        fail_msg = 'Items schema (%s) not a valid Avro schema: %s (known names: %s)' % (items, e, names.names.keys())
+        raise SchemaParseException(fail_msg)
+
+    self.set_prop('items', items_schema)
+
+  # read-only properties
+  items = property(lambda self: self.get_prop('items'))
+
+  def to_json(self, names):
+    to_dump = self.props.copy()
+    item_schema = self.get_prop('items')
+    to_dump['items'] = item_schema.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class MapSchema(Schema):
+  def __init__(self, values, names=None):
+    # Call parent ctor
+    Schema.__init__(self, 'map')
+
+    # Add class members
+    if isinstance(values, basestring) and names.has_name(values, None):
+      values_schema = names.get_name(values, None)
+    else:
+      try:
+        values_schema = make_avsc_object(values, names)
+      except:
+        fail_msg = 'Values schema not a valid Avro schema.'
+        raise SchemaParseException(fail_msg)
+
+    self.set_prop('values', values_schema)
+
+  # read-only properties
+  values = property(lambda self: self.get_prop('values'))
+
+  def to_json(self, names):
+    to_dump = self.props.copy()
+    to_dump['values'] = self.get_prop('values').to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class UnionSchema(Schema):
+  """
+  names is a dictionary of schema objects
+  """
+  def __init__(self, schemas, names=None):
+    # Ensure valid ctor args
+    if not isinstance(schemas, list):
+      fail_msg = 'Union schema requires a list of schemas.'
+      raise SchemaParseException(fail_msg)
+
+    # Call parent ctor
+    Schema.__init__(self, 'union')
+
+    # Add class members
+    schema_objects = []
+    for schema in schemas:
+      if isinstance(schema, basestring) and names.has_name(schema, None):
+        new_schema = names.get_name(schema, None)
+      else:
+        try:
+          new_schema = make_avsc_object(schema, names)
+        except Exception, e:
+          raise SchemaParseException('Union item must be a valid Avro schema: %s' % str(e))
+      # check the new schema
+      if (new_schema.type in VALID_TYPES and new_schema.type not in NAMED_TYPES
+          and new_schema.type in [schema.type for schema in schema_objects]):
+        raise SchemaParseException('%s type already in Union' % new_schema.type)
+      elif new_schema.type == 'union':
+        raise SchemaParseException('Unions cannot contain other unions.')
+      else:
+        schema_objects.append(new_schema)
+    self._schemas = schema_objects
+
+  # read-only properties
+  schemas = property(lambda self: self._schemas)
+
+  def to_json(self, names):
+    to_dump = []
+    for schema in self.schemas:
+      to_dump.append(schema.to_json(names))
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class ErrorUnionSchema(UnionSchema):
+  def __init__(self, schemas, names=None):
+    # Prepend "string" to handle system errors
+    UnionSchema.__init__(self, ['string'] + schemas, names)
+
+  def to_json(self, names):
+    to_dump = []
+    for schema in self.schemas:
+      # Don't print the system error schema
+      if schema.type == 'string': continue
+      to_dump.append(schema.to_json(names))
+    return to_dump
+
+class RecordSchema(NamedSchema):
+  @staticmethod
+  def make_field_objects(field_data, names):
+    """We're going to need to make message parameters too."""
+    field_objects = []
+    field_names = []
+    for i, field in enumerate(field_data):
+      if hasattr(field, 'get') and callable(field.get):
+        type = field.get('type')        
+        name = field.get('name')
+
+        # null values can have a default value of None
+        has_default = False
+        default = None
+        if field.has_key('default'):
+          has_default = True
+          default = field.get('default')
+
+        order = field.get('order')
+        new_field = Field(type, name, has_default, default, order, names)
+        # make sure field name has not been used yet
+        if new_field.name in field_names:
+          fail_msg = 'Field name %s already in use.' % new_field.name
+          raise SchemaParseException(fail_msg)
+        field_names.append(new_field.name)
+      else:
+        raise SchemaParseException('Not a valid field: %s' % field)
+      field_objects.append(new_field)
+    return field_objects
+
+  def __init__(self, name, namespace, fields, names=None, schema_type='record'):
+    # Ensure valid ctor args
+    if fields is None:
+      fail_msg = 'Record schema requires a non-empty fields property.'
+      raise SchemaParseException(fail_msg)
+    elif not isinstance(fields, list):
+      fail_msg = 'Fields property must be a list of Avro schemas.'
+      raise SchemaParseException(fail_msg)
+
+    # Call parent ctor (adds own name to namespace, too)
+    if schema_type == 'request':
+      Schema.__init__(self, schema_type)
+    else:
+      NamedSchema.__init__(self, schema_type, name, namespace, names)
+
+    if schema_type == 'record': 
+      old_default = names.default_namespace
+      names.default_namespace = Name(name, namespace,
+                                     names.default_namespace).get_space()
+
+    # Add class members
+    field_objects = RecordSchema.make_field_objects(fields, names)
+    self.set_prop('fields', field_objects)
+    
+    if schema_type == 'record':
+      names.default_namespace = old_default
+
+  # read-only properties
+  fields = property(lambda self: self.get_prop('fields'))
+
+  @property
+  def fields_dict(self):
+    fields_dict = {}
+    for field in self.fields:
+      fields_dict[field.name] = field
+    return fields_dict
+
+  def to_json(self, names):
+    # Request records don't have names
+    if self.type == 'request':
+      return [ f.to_json(names) for f in self.fields ]
+
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+
+    to_dump = self.props.copy()
+    to_dump['fields'] = [ f.to_json(names) for f in self.fields ]
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+#
+# Module Methods
+#
+
+# TODO(hammer): handle non-reserved properties
+def make_avsc_object(json_data, names=None):
+  """
+  Build Avro Schema from data parsed out of JSON string.
+
+  @arg names: A Name object (tracks seen names and default space)
+  """
+  if names == None:
+    names = Names()
+  
+  # JSON object (non-union)
+  if hasattr(json_data, 'get') and callable(json_data.get):
+    type = json_data.get('type')
+    if type in PRIMITIVE_TYPES:
+      return PrimitiveSchema(type)
+    elif type in NAMED_TYPES:
+      name = json_data.get('name')
+      namespace = json_data.get('namespace')
+      if type == 'fixed':
+        size = json_data.get('size')
+        return FixedSchema(name, namespace, size, names)
+      elif type == 'enum':
+        symbols = json_data.get('symbols')
+        return EnumSchema(name, namespace, symbols, names)
+      elif type in ['record', 'error']:
+        fields = json_data.get('fields')
+        return RecordSchema(name, namespace, fields, names, type)
+      else:
+        raise SchemaParseException('Unknown Named Type: %s' % type)
+    elif type in VALID_TYPES:
+      if type == 'array':
+        items = json_data.get('items')
+        return ArraySchema(items, names)
+      elif type == 'map':
+        values = json_data.get('values')
+        return MapSchema(values, names)
+      elif type == 'error_union':
+        declared_errors = json_data.get('declared_errors')
+        return ErrorUnionSchema(declared_errors, names)
+      else:
+        raise SchemaParseException('Unknown Valid Type: %s' % type)
+    elif type is None:
+      raise SchemaParseException('No "type" property: %s' % json_data)
+    else:
+      raise SchemaParseException('Undefined type: %s' % type)
+  # JSON array (union)
+  elif isinstance(json_data, list):
+    return UnionSchema(json_data, names)
+  # JSON string (primitive)
+  elif json_data in PRIMITIVE_TYPES:
+    return PrimitiveSchema(json_data)
+  # not for us!
+  else:
+    fail_msg = "Could not make an Avro Schema object from %s." % json_data
+    raise SchemaParseException(fail_msg)
+
+# TODO(hammer): make method for reading from a file?
+def parse(json_string):
+  """Constructs the Schema from the JSON text."""
+  # TODO(hammer): preserve stack trace from JSON parse
+  # parse the JSON
+  try:
+    json_data = json.loads(json_string)
+  except:
+    raise SchemaParseException('Error parsing JSON: %s' % json_string)
+
+  # Initialize the names object
+  names = Names()
+
+  # construct the Avro Schema object
+  return make_avsc_object(json_data, names)
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/tool.py b/desktop/core/ext-py/avro-1.5.0/src/avro/tool.py
new file mode 100644
index 0000000..edd6f18
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/tool.py
@@ -0,0 +1,160 @@
+#! /usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Command-line tool
+
+NOTE: The API for the command-line tool is experimental.
+"""
+import sys
+from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
+import urlparse
+from avro import io
+from avro import datafile
+from avro import protocol
+from avro import ipc
+
+class GenericResponder(ipc.Responder):
+  def __init__(self, proto, msg, datum):
+    proto_json = file(proto, 'r').read()
+    ipc.Responder.__init__(self, protocol.parse(proto_json))
+    self.msg = msg
+    self.datum = datum
+
+  def invoke(self, message, request):
+    if message.name == self.msg:
+      print >> sys.stderr, "Message: %s Datum: %s" % (message.name, self.datum)
+      # server will shut down after processing a single Avro request
+      global server_should_shutdown
+      server_should_shutdown = True
+      return self.datum
+
+class GenericHandler(BaseHTTPRequestHandler):
+  def do_POST(self):
+    self.responder = responder
+    call_request_reader = ipc.FramedReader(self.rfile)
+    call_request = call_request_reader.read_framed_message()
+    resp_body = self.responder.respond(call_request)
+    self.send_response(200)
+    self.send_header('Content-Type', 'avro/binary')
+    self.end_headers()
+    resp_writer = ipc.FramedWriter(self.wfile)
+    resp_writer.write_framed_message(resp_body)
+    if server_should_shutdown:
+      print >> sys.stderr, "Shutting down server."
+      self.server.force_stop()
+
+class StoppableHTTPServer(HTTPServer):
+  """HTTPServer.shutdown added in Python 2.6. FML."""
+  stopped = False
+  allow_reuse_address = True
+  def __init__(self, *args, **kw):
+    HTTPServer.__init__(self, *args, **kw)
+    self.allow_reuse_address = True
+
+  def serve_forever(self):
+    while not self.stopped:
+      self.handle_request()
+
+  def force_stop(self):
+    self.server_close()
+    self.stopped = True
+    self.serve_forever()
+
+def run_server(uri, proto, msg, datum):
+  url_obj = urlparse.urlparse(uri)
+  server_addr = (url_obj.hostname, url_obj.port)
+  global responder
+  global server_should_shutdown
+  server_should_shutdown = False
+  responder = GenericResponder(proto, msg, datum)
+  server = StoppableHTTPServer(server_addr, GenericHandler)
+  print "Port: %s" % server.server_port
+  sys.stdout.flush()
+  server.allow_reuse_address = True
+  print >> sys.stderr, "Starting server."
+  server.serve_forever()
+
+def send_message(uri, proto, msg, datum):
+  url_obj = urlparse.urlparse(uri)
+  client = ipc.HTTPTransceiver(url_obj.hostname, url_obj.port)
+  proto_json = file(proto, 'r').read()
+  requestor = ipc.Requestor(protocol.parse(proto_json), client)
+  print requestor.request(msg, datum)
+
+def file_or_stdin(f):
+  if f == "-":
+    return sys.stdin
+  else:
+    return file(f)
+
+def main(args=sys.argv):
+  if len(args) == 1:
+    print "Usage: %s [dump|rpcreceive|rpcsend]" % args[0]
+    return 1
+
+  if args[1] == "dump":
+    if len(args) != 3:
+      print "Usage: %s dump input_file" % args[0]
+      return 1
+    for d in datafile.DataFileReader(file_or_stdin(args[2]), io.DatumReader()):
+      print repr(d)
+  elif args[1] == "rpcreceive":
+    usage_str = "Usage: %s rpcreceive uri protocol_file " % args[0]
+    usage_str += "message_name (-data d | -file f)"
+    if len(args) not in [5, 7]:
+      print usage_str
+      return 1
+    uri, proto, msg = args[2:5]
+    datum = None
+    if len(args) > 5:
+      if args[5] == "-file":
+        reader = open(args[6], 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        datum = dfr.next()
+      elif args[5] == "-data":
+        print "JSON Decoder not yet implemented."
+        return 1
+      else:
+        print usage_str
+        return 1
+    run_server(uri, proto, msg, datum)
+  elif args[1] == "rpcsend":
+    usage_str = "Usage: %s rpcsend uri protocol_file " % args[0]
+    usage_str += "message_name (-data d | -file f)"
+    if len(args) not in [5, 7]:
+      print usage_str
+      return 1
+    uri, proto, msg = args[2:5]
+    datum = None
+    if len(args) > 5:
+      if args[5] == "-file":
+        reader = open(args[6], 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        datum = dfr.next()
+      elif args[5] == "-data":
+        print "JSON Decoder not yet implemented."
+        return 1
+      else:
+        print usage_str
+        return 1
+    send_message(uri, proto, msg, datum)
+  return 0
+  
+if __name__ == "__main__":
+  sys.exit(main(sys.argv))
diff --git a/desktop/core/ext-py/avro-1.5.0/src/avro/txipc.py b/desktop/core/ext-py/avro-1.5.0/src/avro/txipc.py
new file mode 100644
index 0000000..6a4d8b7
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/src/avro/txipc.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from avro import ipc
+from avro import io
+
+from zope.interface import implements
+
+from twisted.web.client import Agent
+from twisted.web.http_headers import Headers
+from twisted.internet.defer import maybeDeferred, Deferred
+from twisted.web.iweb import IBodyProducer
+from twisted.web import resource, server
+from twisted.internet.protocol import Protocol
+
+class TwistedRequestor(ipc.BaseRequestor):
+  """A Twisted-compatible requestor. Returns a Deferred that will fire with the
+     returning value, instead of blocking until the request completes."""
+  def _process_handshake(self, call_response, message_name, request_datum):
+    # process the handshake and call response
+    buffer_decoder = io.BinaryDecoder(StringIO(call_response))
+    call_response_exists = self.read_handshake_response(buffer_decoder)
+    if call_response_exists:
+      return self.read_call_response(message_name, buffer_decoder)
+    else:
+      return self.request(message_name, request_datum)
+
+  def issue_request(self, call_request, message_name, request_datum):
+    d = self.transceiver.transceive(call_request)
+    d.addCallback(self._process_handshake, message_name, request_datum)
+    return d
+
+class RequestStreamingProducer(object):
+  """A streaming producer for issuing requests with the Twisted.web Agent."""
+  implements(IBodyProducer)
+
+  paused = False
+  stopped = False
+  started = False
+
+  def __init__(self, message):
+    self._message = message
+    self._length = len(message)
+    # We need a buffer length header for every buffer and an additional
+    # zero-length buffer as the message terminator
+    self._length += (self._length / ipc.BUFFER_SIZE + 2) \
+      * ipc.BUFFER_HEADER_LENGTH
+    self._total_bytes_sent = 0
+    self._deferred = Deferred()
+
+  # read-only properties
+  message = property(lambda self: self._message)
+  length = property(lambda self: self._length)
+  consumer = property(lambda self: self._consumer)
+  deferred = property(lambda self: self._deferred)
+
+  def _get_total_bytes_sent(self):
+    return self._total_bytes_sent
+
+  def _set_total_bytes_sent(self, bytes_sent):
+    self._total_bytes_sent = bytes_sent
+
+  total_bytes_sent = property(_get_total_bytes_sent, _set_total_bytes_sent)
+
+  def startProducing(self, consumer):
+    if self.started:
+      return
+
+    self.started = True
+    self._consumer = consumer
+    # Keep writing data to the consumer until we're finished,
+    # paused (pauseProducing()) or stopped (stopProducing())
+    while self.length - self.total_bytes_sent > 0 and \
+      not self.paused and not self.stopped:
+      self.write()
+    # self.write will fire this deferred once it has written
+    # the entire message to the consumer
+    return self.deferred
+
+  def resumeProducing(self):
+    self.paused = False
+    self.write(self)
+
+  def pauseProducing(self):
+    self.paused = True
+
+  def stopProducing(self):
+    self.stopped = True
+
+  def write(self):
+    if self.length - self.total_bytes_sent > ipc.BUFFER_SIZE:
+      buffer_length = ipc.BUFFER_SIZE
+    else:
+      buffer_length = self.length - self.total_bytes_sent
+    self.write_buffer(self.message[self.total_bytes_sent:
+                              (self.total_bytes_sent + buffer_length)])
+    self.total_bytes_sent += buffer_length
+    # Make sure we wrote the entire message
+    if self.total_bytes_sent == self.length and not self.stopped:
+      self.stopProducing()
+      # A message is always terminated by a zero-length buffer.
+      self.write_buffer_length(0)
+      self.deferred.callback(None)
+
+  def write_buffer(self, chunk):
+    buffer_length = len(chunk)
+    self.write_buffer_length(buffer_length)
+    self.consumer.write(chunk)
+
+  def write_buffer_length(self, n):
+    self.consumer.write(ipc.BIG_ENDIAN_INT_STRUCT.pack(n))
+
+class AvroProtocol(Protocol):
+
+  recvd = ''
+  done = False
+
+  def __init__(self, finished):
+    self.finished = finished
+    self.message = []
+
+  def dataReceived(self, data):
+    self.recvd = self.recvd + data
+    while len(self.recvd) >= ipc.BUFFER_HEADER_LENGTH:
+      buffer_length ,= ipc.BIG_ENDIAN_INT_STRUCT.unpack(
+        self.recvd[:ipc.BUFFER_HEADER_LENGTH])
+      if buffer_length == 0:
+        response = ''.join(self.message)
+        self.done = True
+        self.finished.callback(response)
+        break
+      if len(self.recvd) < buffer_length + ipc.BUFFER_HEADER_LENGTH:
+        break
+      buffer = self.recvd[ipc.BUFFER_HEADER_LENGTH:buffer_length + ipc.BUFFER_HEADER_LENGTH]
+      self.recvd = self.recvd[buffer_length + ipc.BUFFER_HEADER_LENGTH:]
+      self.message.append(buffer)
+
+  def connectionLost(self, reason):
+    if not self.done:
+      self.finished.errback(ipc.ConnectionClosedException("Reader read 0 bytes."))
+
+class TwistedHTTPTransceiver(object):
+  """This transceiver uses the Agent class present in Twisted.web >= 9.0
+     for issuing requests to the remote endpoint."""
+  def __init__(self, host, port, remote_name=None, reactor=None):
+    self.url = "http://%s:%d/" % (host, port)
+
+    if remote_name is None:
+      # There's no easy way to get this peer's remote address
+      # in Twisted so I use a random UUID to identify ourselves
+      import uuid
+      self.remote_name = uuid.uuid4()
+
+    if reactor is None:
+      from twisted.internet import reactor
+    self.agent = Agent(reactor)
+
+  def read_framed_message(self, response):
+    finished = Deferred()
+    response.deliverBody(AvroProtocol(finished))
+    return finished
+
+  def transceive(self, request):
+    req_method = 'POST'
+    req_headers = {
+      'Content-Type': ['avro/binary'],
+      'Accept-Encoding': ['identity'],
+    }
+
+    body_producer = RequestStreamingProducer(request)
+    d = self.agent.request(
+      req_method,
+      self.url,
+      headers=Headers(req_headers),
+      bodyProducer=body_producer)
+    return d.addCallback(self.read_framed_message)
+
+class AvroResponderResource(resource.Resource):
+  """This Twisted.web resource can be placed anywhere in a URL hierarchy
+     to provide an Avro endpoint. Different Avro protocols can be served
+     by the same web server as long as they are in different resources in
+     a URL hierarchy."""
+  isLeaf = True
+
+  def __init__(self, responder):
+    resource.Resource.__init__(self)
+    self.responder = responder
+
+  def cb_render_POST(self, resp_body, request):
+    request.setResponseCode(200)
+    request.setHeader('Content-Type', 'avro/binary')
+    resp_writer = ipc.FramedWriter(request)
+    resp_writer.write_framed_message(resp_body)
+    request.finish()
+
+  def render_POST(self, request):
+    # Unfortunately, Twisted.web doesn't support incoming
+    # streamed input yet, the whole payload must be kept in-memory
+    request.content.seek(0, 0)
+    call_request_reader = ipc.FramedReader(request.content)
+    call_request = call_request_reader.read_framed_message()
+    d = maybeDeferred(self.responder.respond, call_request)
+    d.addCallback(self.cb_render_POST, request)
+    return server.NOT_DONE_YET
diff --git a/desktop/core/ext-py/avro-1.5.0/test/test_datafile.py b/desktop/core/ext-py/avro-1.5.0/test/test_datafile.py
new file mode 100644
index 0000000..2f6f550
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/test/test_datafile.py
@@ -0,0 +1,149 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+from avro import schema
+from avro import io
+from avro import datafile
+
+SCHEMAS_TO_VALIDATE = (
+  ('"null"', None),
+  ('"boolean"', True),
+  ('"string"', unicode('adsfasdf09809dsf-=adsf')),
+  ('"bytes"', '12345abcd'),
+  ('"int"', 1234),
+  ('"long"', 1234),
+  ('"float"', 1234.0),
+  ('"double"', 1234.0),
+  ('{"type": "fixed", "name": "Test", "size": 1}', 'B'),
+  ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+  ('{"type": "array", "items": "long"}', [1, 3, 2]),
+  ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+  ('["string", "null", "long"]', None),
+  ("""\
+   {"type": "record",
+    "name": "Test",
+    "fields": [{"name": "f", "type": "long"}]}
+   """, {'f': 5}),
+  ("""\
+   {"type": "record",
+    "name": "Lisp",
+    "fields": [{"name": "value",
+                "type": ["null", "string",
+                         {"type": "record",
+                          "name": "Cons",
+                          "fields": [{"name": "car", "type": "Lisp"},
+                                     {"name": "cdr", "type": "Lisp"}]}]}]}
+   """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+FILENAME = 'test_datafile.out'
+CODECS_TO_VALIDATE = ('null', 'deflate')
+
+# TODO(hammer): clean up written files with ant, not os.remove
+class TestDataFile(unittest.TestCase):
+  def test_round_trip(self):
+    print ''
+    print 'TEST ROUND TRIP'
+    print '==============='
+    print ''
+    correct = 0
+    for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+      for codec in CODECS_TO_VALIDATE:
+        print ''
+        print 'SCHEMA NUMBER %d' % (i + 1)
+        print '================'
+        print ''
+        print 'Schema: %s' % example_schema
+        print 'Datum: %s' % datum
+        print 'Codec: %s' % codec
+
+        # write data in binary to file 10 times
+        writer = open(FILENAME, 'wb')
+        datum_writer = io.DatumWriter()
+        schema_object = schema.parse(example_schema)
+        dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec)
+        for i in range(10):
+          dfw.append(datum)
+        dfw.close()
+
+        # read data in binary from file
+        reader = open(FILENAME, 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        round_trip_data = []
+        for datum in dfr:
+          round_trip_data.append(datum)
+
+        print 'Round Trip Data: %s' % round_trip_data
+        print 'Round Trip Data Length: %d' % len(round_trip_data)
+        is_correct = [datum] * 10 == round_trip_data
+        if is_correct: correct += 1
+        print 'Correct Round Trip: %s' % is_correct
+        print ''
+    os.remove(FILENAME)
+    self.assertEquals(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
+
+  def test_append(self):
+    print ''
+    print 'TEST APPEND'
+    print '==========='
+    print ''
+    correct = 0
+    for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+      for codec in CODECS_TO_VALIDATE:
+        print ''
+        print 'SCHEMA NUMBER %d' % (i + 1)
+        print '================'
+        print ''
+        print 'Schema: %s' % example_schema
+        print 'Datum: %s' % datum
+        print 'Codec: %s' % codec
+
+        # write data in binary to file once
+        writer = open(FILENAME, 'wb')
+        datum_writer = io.DatumWriter()
+        schema_object = schema.parse(example_schema)
+        dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec)
+        dfw.append(datum)
+        dfw.close()
+
+        # open file, write, and close nine times
+        for i in range(9):
+          writer = open(FILENAME, 'ab+')
+          dfw = datafile.DataFileWriter(writer, io.DatumWriter())
+          dfw.append(datum)
+          dfw.close()
+
+        # read data in binary from file
+        reader = open(FILENAME, 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        appended_data = []
+        for datum in dfr:
+          appended_data.append(datum)
+
+        print 'Appended Data: %s' % appended_data
+        print 'Appended Data Length: %d' % len(appended_data)
+        is_correct = [datum] * 10 == appended_data
+        if is_correct: correct += 1
+        print 'Correct Appended: %s' % is_correct
+        print ''
+    os.remove(FILENAME)
+    self.assertEquals(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/desktop/core/ext-py/avro-1.5.0/test/test_datafile_interop.py b/desktop/core/ext-py/avro-1.5.0/test/test_datafile_interop.py
new file mode 100644
index 0000000..d4618b4
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/test/test_datafile_interop.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+from avro import io
+from avro import datafile
+
+class TestDataFileInterop(unittest.TestCase):
+  def test_interop(self):
+    print ''
+    print 'TEST INTEROP'
+    print '============'
+    print ''
+    for f in os.listdir('/home/cutting/src/avro/release-1.5.0-rc2/lang/py/../../build/interop/data'):
+      print 'READING %s' % f
+      print ''
+
+      # read data in binary from file
+      reader = open(os.path.join('/home/cutting/src/avro/release-1.5.0-rc2/lang/py/../../build/interop/data', f), 'rb')
+      datum_reader = io.DatumReader()
+      dfr = datafile.DataFileReader(reader, datum_reader)
+      for datum in dfr:
+        assert datum is not None
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/desktop/core/ext-py/avro-1.5.0/test/test_io.py b/desktop/core/ext-py/avro-1.5.0/test/test_io.py
new file mode 100644
index 0000000..05a6f80
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/test/test_io.py
@@ -0,0 +1,337 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from binascii import hexlify
+from avro import schema
+from avro import io
+
+SCHEMAS_TO_VALIDATE = (
+  ('"null"', None),
+  ('"boolean"', True),
+  ('"string"', unicode('adsfasdf09809dsf-=adsf')),
+  ('"bytes"', '12345abcd'),
+  ('"int"', 1234),
+  ('"long"', 1234),
+  ('"float"', 1234.0),
+  ('"double"', 1234.0),
+  ('{"type": "fixed", "name": "Test", "size": 1}', 'B'),
+  ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+  ('{"type": "array", "items": "long"}', [1, 3, 2]),
+  ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+  ('["string", "null", "long"]', None),
+  ("""\
+   {"type": "record",
+    "name": "Test",
+    "fields": [{"name": "f", "type": "long"}]}
+   """, {'f': 5}),
+  ("""\
+   {"type": "record",
+    "name": "Lisp",
+    "fields": [{"name": "value",
+                "type": ["null", "string",
+                         {"type": "record",
+                          "name": "Cons",
+                          "fields": [{"name": "car", "type": "Lisp"},
+                                     {"name": "cdr", "type": "Lisp"}]}]}]}
+   """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+BINARY_ENCODINGS = (
+  (0, '00'),
+  (-1, '01'),
+  (1, '02'),
+  (-2, '03'),
+  (2, '04'),
+  (-64, '7f'),
+  (64, '80 01'),
+  (8192, '80 80 01'),
+  (-8193, '81 80 01'),
+)
+
+DEFAULT_VALUE_EXAMPLES = (
+  ('"null"', 'null', None),
+  ('"boolean"', 'true', True),
+  ('"string"', '"foo"', u'foo'),
+  ('"bytes"', '"\u00FF\u00FF"', u'\xff\xff'),
+  ('"int"', '5', 5),
+  ('"long"', '5', 5L),
+  ('"float"', '1.1', 1.1),
+  ('"double"', '1.1', 1.1),
+  ('{"type": "fixed", "name": "F", "size": 2}', '"\u00FF\u00FF"', u'\xff\xff'),
+  ('{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}', '"FOO"', 'FOO'),
+  ('{"type": "array", "items": "int"}', '[1, 2, 3]', [1, 2, 3]),
+  ('{"type": "map", "values": "int"}', '{"a": 1, "b": 2}', {'a': 1, 'b': 2}),
+  ('["int", "null"]', '5', 5),
+  ('{"type": "record", "name": "F", "fields": [{"name": "A", "type": "int"}]}',
+   '{"A": 5}', {'A': 5}),
+)
+
+LONG_RECORD_SCHEMA = schema.parse("""\
+  {"type": "record",
+   "name": "Test",
+   "fields": [{"name": "A", "type": "int"},
+              {"name": "B", "type": "int"},
+              {"name": "C", "type": "int"},
+              {"name": "D", "type": "int"},
+              {"name": "E", "type": "int"},
+              {"name": "F", "type": "int"},
+              {"name": "G", "type": "int"}]}""")
+
+LONG_RECORD_DATUM = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7}
+
+def avro_hexlify(reader):
+  """Return the hex value, as a string, of a binary-encoded int or long."""
+  bytes = []
+  current_byte = reader.read(1)
+  bytes.append(hexlify(current_byte))
+  while (ord(current_byte) & 0x80) != 0:
+    current_byte = reader.read(1)
+    bytes.append(hexlify(current_byte))
+  return ' '.join(bytes)
+
+def print_test_name(test_name):
+  print ''
+  print test_name
+  print '=' * len(test_name)
+  print ''
+
+def write_datum(datum, writers_schema):
+  writer = StringIO()
+  encoder = io.BinaryEncoder(writer)
+  datum_writer = io.DatumWriter(writers_schema)
+  datum_writer.write(datum, encoder)
+  return writer, encoder, datum_writer
+
+def read_datum(buffer, writers_schema, readers_schema=None):
+  reader = StringIO(buffer.getvalue())
+  decoder = io.BinaryDecoder(reader)
+  datum_reader = io.DatumReader(writers_schema, readers_schema)
+  return datum_reader.read(decoder)
+
+def check_binary_encoding(number_type):
+  print_test_name('TEST BINARY %s ENCODING' % number_type.upper())
+  correct = 0
+  for datum, hex_encoding in BINARY_ENCODINGS:
+    print 'Datum: %d' % datum
+    print 'Correct Encoding: %s' % hex_encoding
+
+    writers_schema = schema.parse('"%s"' % number_type.lower())
+    writer, encoder, datum_writer = write_datum(datum, writers_schema)
+    writer.seek(0)
+    hex_val = avro_hexlify(writer)
+
+    print 'Read Encoding: %s' % hex_val
+    if hex_encoding == hex_val: correct += 1
+    print ''
+  return correct
+
+def check_skip_number(number_type):
+  print_test_name('TEST SKIP %s' % number_type.upper())
+  correct = 0
+  for value_to_skip, hex_encoding in BINARY_ENCODINGS:
+    VALUE_TO_READ = 6253
+    print 'Value to Skip: %d' % value_to_skip
+
+    # write the value to skip and a known value
+    writers_schema = schema.parse('"%s"' % number_type.lower())
+    writer, encoder, datum_writer = write_datum(value_to_skip, writers_schema)
+    datum_writer.write(VALUE_TO_READ, encoder)
+
+    # skip the value
+    reader = StringIO(writer.getvalue())
+    decoder = io.BinaryDecoder(reader)
+    decoder.skip_long()
+
+    # read data from string buffer
+    datum_reader = io.DatumReader(writers_schema)
+    read_value = datum_reader.read(decoder)
+
+    print 'Read Value: %d' % read_value
+    if read_value == VALUE_TO_READ: correct += 1
+    print ''
+  return correct
+    
+class TestIO(unittest.TestCase):
+  #
+  # BASIC FUNCTIONALITY
+  #
+
+  def test_validate(self):
+    print_test_name('TEST VALIDATE')
+    passed = 0
+    for example_schema, datum in SCHEMAS_TO_VALIDATE:
+      print 'Schema: %s' % example_schema
+      print 'Datum: %s' % datum
+      validated = io.validate(schema.parse(example_schema), datum)
+      print 'Valid: %s' % validated
+      if validated: passed += 1
+    self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
+
+  def test_round_trip(self):
+    print_test_name('TEST ROUND TRIP')
+    correct = 0
+    for example_schema, datum in SCHEMAS_TO_VALIDATE:
+      print 'Schema: %s' % example_schema
+      print 'Datum: %s' % datum
+
+      writers_schema = schema.parse(example_schema)
+      writer, encoder, datum_writer = write_datum(datum, writers_schema)
+      round_trip_datum = read_datum(writer, writers_schema)
+
+      print 'Round Trip Datum: %s' % round_trip_datum
+      if datum == round_trip_datum: correct += 1
+    self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE))
+
+  #
+  # BINARY ENCODING OF INT AND LONG
+  #
+
+  def test_binary_int_encoding(self):
+    correct = check_binary_encoding('int')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  def test_binary_long_encoding(self):
+    correct = check_binary_encoding('long')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  def test_skip_int(self):
+    correct = check_skip_number('int')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  def test_skip_long(self):
+    correct = check_skip_number('long')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  #
+  # SCHEMA RESOLUTION
+  #
+
+  def test_schema_promotion(self):
+    print_test_name('TEST SCHEMA PROMOTION')
+    # note that checking writers_schema.type in read_data
+    # allows us to handle promotion correctly
+    promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
+    incorrect = 0
+    for i, ws in enumerate(promotable_schemas):
+      writers_schema = schema.parse(ws)
+      datum_to_write = 219
+      for rs in promotable_schemas[i + 1:]:
+        readers_schema = schema.parse(rs)
+        writer, enc, dw = write_datum(datum_to_write, writers_schema)
+        datum_read = read_datum(writer, writers_schema, readers_schema)
+        print 'Writer: %s Reader: %s' % (writers_schema, readers_schema)
+        print 'Datum Read: %s' % datum_read
+        if datum_read != datum_to_write: incorrect += 1
+    self.assertEquals(incorrect, 0)
+
+  def test_unknown_symbol(self):
+    print_test_name('TEST UNKNOWN SYMBOL')
+    writers_schema = schema.parse("""\
+      {"type": "enum", "name": "Test",
+       "symbols": ["FOO", "BAR"]}""")
+    datum_to_write = 'FOO'
+
+    readers_schema = schema.parse("""\
+      {"type": "enum", "name": "Test",
+       "symbols": ["BAR", "BAZ"]}""")
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    reader = StringIO(writer.getvalue())
+    decoder = io.BinaryDecoder(reader)
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    self.assertRaises(io.SchemaResolutionException, datum_reader.read, decoder)
+
+  def test_default_value(self):
+    print_test_name('TEST DEFAULT VALUE')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    correct = 0
+    for field_type, default_json, default_datum in DEFAULT_VALUE_EXAMPLES:
+      readers_schema = schema.parse("""\
+        {"type": "record", "name": "Test",
+         "fields": [{"name": "H", "type": %s, "default": %s}]}
+        """ % (field_type, default_json))
+      datum_to_read = {'H': default_datum}
+
+      writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+      datum_read = read_datum(writer, writers_schema, readers_schema)
+      print 'Datum Read: %s' % datum_read
+      if datum_to_read == datum_read: correct += 1
+    self.assertEquals(correct, len(DEFAULT_VALUE_EXAMPLES))
+
+  def test_no_default_value(self):
+    print_test_name('TEST NO DEFAULT VALUE')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    readers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "H", "type": "int"}]}""")
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    reader = StringIO(writer.getvalue())
+    decoder = io.BinaryDecoder(reader)
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    self.assertRaises(io.SchemaResolutionException, datum_reader.read, decoder)
+
+  def test_projection(self):
+    print_test_name('TEST PROJECTION')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    readers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "E", "type": "int"},
+                  {"name": "F", "type": "int"}]}""")
+    datum_to_read = {'E': 5, 'F': 6}
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    datum_read = read_datum(writer, writers_schema, readers_schema)
+    print 'Datum Read: %s' % datum_read
+    self.assertEquals(datum_to_read, datum_read)
+
+  def test_field_order(self):
+    print_test_name('TEST FIELD ORDER')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    readers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "F", "type": "int"},
+                  {"name": "E", "type": "int"}]}""")
+    datum_to_read = {'E': 5, 'F': 6}
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    datum_read = read_datum(writer, writers_schema, readers_schema)
+    print 'Datum Read: %s' % datum_read
+    self.assertEquals(datum_to_read, datum_read)
+
+  def test_type_exception(self):
+    print_test_name('TEST TYPE EXCEPTION')
+    writers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "F", "type": "int"},
+                  {"name": "E", "type": "int"}]}""")
+    datum_to_write = {'E': 5, 'F': 'Bad'}
+    self.assertRaises(io.AvroTypeException, write_datum, datum_to_write, writers_schema)
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/desktop/core/ext-py/avro-1.5.0/test/test_ipc.py b/desktop/core/ext-py/avro-1.5.0/test/test_ipc.py
new file mode 100644
index 0000000..1d1b733
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/test/test_ipc.py
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+There are currently no IPC tests within python, in part because there are no
+servers yet available.
+"""
+import unittest
+
+# This test does import this code, to make sure it at least passes
+# compilation.
+import avro.ipc
+
+class TestIPC(unittest.TestCase):
+  def test_placeholder(self):
+    pass
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/desktop/core/ext-py/avro-1.5.0/test/test_protocol.py b/desktop/core/ext-py/avro-1.5.0/test/test_protocol.py
new file mode 100644
index 0000000..06bda40
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/test/test_protocol.py
@@ -0,0 +1,422 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the protocol parsing logic.
+"""
+import unittest
+from avro import protocol
+
+class ExampleProtocol(object):
+  def __init__(self, protocol_string, valid, name='', comment=''):
+    self._protocol_string = protocol_string
+    self._valid = valid
+    self._name = name or protocol_string # default to schema_string for name
+    self._comment = comment
+
+  # read-only properties
+  protocol_string = property(lambda self: self._protocol_string)
+  valid = property(lambda self: self._valid)
+  name = property(lambda self: self._name)
+
+  # read/write properties
+  def set_comment(self, new_comment): self._comment = new_comment
+  comment = property(lambda self: self._comment, set_comment)
+
+#
+# Example Protocols
+#
+
+EXAMPLES = [
+  ExampleProtocol("""\
+{
+  "namespace": "com.acme",
+  "protocol": "HelloWorld",
+
+  "types": [
+    {"name": "Greeting", "type": "record", "fields": [
+      {"name": "message", "type": "string"}]},
+    {"name": "Curse", "type": "error", "fields": [
+      {"name": "message", "type": "string"}]}
+  ],
+
+  "messages": {
+    "hello": {
+      "request": [{"name": "greeting", "type": "Greeting" }],
+      "response": "Greeting",
+      "errors": ["Curse"]
+    }
+  }
+}
+    """, True),
+  ExampleProtocol("""\
+{"namespace": "org.apache.avro.test",
+ "protocol": "Simple",
+
+ "types": [
+     {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"]},
+
+     {"name": "MD5", "type": "fixed", "size": 16},
+
+     {"name": "TestRecord", "type": "record",
+      "fields": [
+          {"name": "name", "type": "string", "order": "ignore"},
+          {"name": "kind", "type": "Kind", "order": "descending"},
+          {"name": "hash", "type": "MD5"}
+      ]
+     },
+
+     {"name": "TestError", "type": "error", "fields": [
+         {"name": "message", "type": "string"}
+      ]
+     }
+
+ ],
+
+ "messages": {
+
+     "hello": {
+         "request": [{"name": "greeting", "type": "string"}],
+         "response": "string"
+     },
+
+     "echo": {
+         "request": [{"name": "record", "type": "TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "add": {
+         "request": [{"name": "arg1", "type": "int"}, {"name": "arg2", "type": "int"}],
+         "response": "int"
+     },
+
+     "echoBytes": {
+         "request": [{"name": "data", "type": "bytes"}],
+         "response": "bytes"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["TestError"]
+     }
+ }
+
+}
+    """, True),
+  ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespace",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
+     },
+     {"name": "TestError", "namespace": "org.apache.avro.test.errors",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "record", "type": "TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.errors.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestImplicitNamespace",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+                  {"name": "unqalified", "type": "ReferencedRecord"} ]
+     },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "org.apache.avro.test.namespace.TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespaceTwo",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "namespace": "org.apache.avro.other.namespace", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+                  {"name": "qualified", 
+                    "type": "org.apache.avro.other.namespace.ReferencedRecord"} 
+                ]
+     },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "org.apache.avro.test.namespace.TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestValidRepeatedName",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "namespace": "org.apache.avro.other.namespace", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "bar", "type": "double"} ] },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "ReferencedRecord"}],
+         "response": "org.apache.avro.other.namespace.ReferencedRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestInvalidRepeatedName",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "bar", "type": "double"} ] },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "ReferencedRecord"}],
+         "response": "org.apache.avro.other.namespace.ReferencedRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, False),
+  ExampleProtocol("""\
+{"namespace": "org.apache.avro.test",
+ "protocol": "BulkData",
+
+ "types": [],
+
+ "messages": {
+
+     "read": {
+         "request": [],
+         "response": "bytes"
+     },
+
+     "write": {
+         "request": [ {"name": "data", "type": "bytes"} ],
+         "response": "null"
+     }
+
+ }
+
+}
+    """, True),
+  ExampleProtocol("""\
+{
+  "protocol" : "API",
+  "namespace" : "xyz.api",
+  "types" : [ {
+    "type" : "enum",
+    "name" : "Symbology",
+    "namespace" : "xyz.api.product",
+    "symbols" : [ "OPRA", "CUSIP", "ISIN", "SEDOL" ]
+  }, {
+    "type" : "record",
+    "name" : "Symbol",
+    "namespace" : "xyz.api.product",
+    "fields" : [ {
+      "name" : "symbology",
+      "type" : "xyz.api.product.Symbology"
+    }, {
+      "name" : "symbol",
+      "type" : "string"
+    } ]
+  }, {
+    "type" : "record",
+    "name" : "MultiSymbol",
+    "namespace" : "xyz.api.product",
+    "fields" : [ {
+      "name" : "symbols",
+      "type" : {
+        "type" : "map",
+        "values" : "xyz.api.product.Symbol"
+      }
+    } ]
+  } ],
+  "messages" : {
+  }
+}
+    """, True),
+]
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+class TestProtocol(unittest.TestCase):
+  def test_parse(self):
+    num_correct = 0
+    for example in EXAMPLES:
+      try:
+        protocol.parse(example.protocol_string)
+        if example.valid: 
+          num_correct += 1
+        else:
+          self.fail("Parsed invalid protocol: %s" % (example.name,))
+      except Exception, e:
+        if not example.valid: 
+          num_correct += 1
+        else:
+          self.fail("Coudl not parse valid protocol: %s" % (example.name,))
+
+    fail_msg = "Parse behavior correct on %d out of %d protocols." % \
+      (num_correct, len(EXAMPLES))
+    self.assertEqual(num_correct, len(EXAMPLES), fail_msg)
+
+  def test_valid_cast_to_string_after_parse(self):
+    """
+    Test that the string generated by an Avro Protocol object
+    is, in fact, a valid Avro protocol.
+    """
+    print ''
+    print 'TEST CAST TO STRING'
+    print '==================='
+    print ''
+
+    num_correct = 0
+    for example in VALID_EXAMPLES:
+      protocol_data = protocol.parse(example.protocol_string)
+      try:
+        try:
+          protocol.parse(str(protocol_data))
+          debug_msg = "%s: STRING CAST SUCCESS" % example.name
+          num_correct += 1
+        except:
+          debug_msg = "%s: STRING CAST FAILURE" % example.name
+      finally:
+        print debug_msg
+
+    fail_msg = "Cast to string success on %d out of %d protocols" % \
+      (num_correct, len(VALID_EXAMPLES))
+    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+  def test_equivalence_after_round_trip(self):
+    """
+    1. Given a string, parse it to get Avro protocol "original".
+    2. Serialize "original" to a string and parse that string
+         to generate Avro protocol "round trip".
+    3. Ensure "original" and "round trip" protocols are equivalent.
+    """
+    print ''
+    print 'TEST ROUND TRIP'
+    print '==============='
+    print ''
+
+    num_correct = 0
+    for example in VALID_EXAMPLES:
+      original_protocol = protocol.parse(example.protocol_string)
+      round_trip_protocol = protocol.parse(str(original_protocol))
+
+      if original_protocol == round_trip_protocol:
+        num_correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:       
+        self.fail("Round trip failure: %s %s %s", (example.name, example.protocol_string, str(original_protocol)))
+
+    fail_msg = "Round trip success on %d out of %d protocols" % \
+      (num_correct, len(VALID_EXAMPLES))
+    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/desktop/core/ext-py/avro-1.5.0/test/test_schema.py b/desktop/core/ext-py/avro-1.5.0/test/test_schema.py
new file mode 100644
index 0000000..4700144
--- /dev/null
+++ b/desktop/core/ext-py/avro-1.5.0/test/test_schema.py
@@ -0,0 +1,394 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the schema parsing logic.
+"""
+import unittest
+from avro import schema
+
+def print_test_name(test_name):
+  print ''
+  print test_name
+  print '=' * len(test_name)
+  print ''
+
+class ExampleSchema(object):
+  def __init__(self, schema_string, valid, name='', comment=''):
+    self._schema_string = schema_string
+    self._valid = valid
+    self._name = name or schema_string # default to schema_string for name
+    self.comment = comment
+
+  @property
+  def schema_string(self):
+    return self._schema_string
+
+  @property
+  def valid(self):
+    return self._valid
+
+  @property
+  def name(self):
+    return self._name
+
+#
+# Example Schemas
+#
+
+def make_primitive_examples():
+  examples = []
+  for type in schema.PRIMITIVE_TYPES:
+    examples.append(ExampleSchema('"%s"' % type, True))
+    examples.append(ExampleSchema('{"type": "%s"}' % type, True))
+  return examples
+
+PRIMITIVE_EXAMPLES = [
+  ExampleSchema('"True"', False),
+  ExampleSchema('True', False),
+  ExampleSchema('{"no_type": "test"}', False),
+  ExampleSchema('{"type": "panther"}', False),
+] + make_primitive_examples()
+
+FIXED_EXAMPLES = [
+  ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True),
+  ExampleSchema("""\
+    {"type": "fixed",
+     "name": "MyFixed",
+     "namespace": "org.apache.hadoop.avro",
+     "size": 1}
+    """, True),
+  ExampleSchema("""\
+    {"type": "fixed",
+     "name": "Missing size"}
+    """, False),
+  ExampleSchema("""\
+    {"type": "fixed",
+     "size": 314}
+    """, False),
+]
+
+ENUM_EXAMPLES = [
+  ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "Status",
+     "symbols": "Normal Caution Critical"}
+    """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": [ 0, 1, 1, 2, 3, 5, 8 ],
+     "symbols": ["Golden", "Mean"]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "symbols" : ["I", "will", "fail", "no", "name"]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "Test"
+     "symbols" : ["AA", "AA"]}
+    """, False),
+]
+
+ARRAY_EXAMPLES = [
+  ExampleSchema('{"type": "array", "items": "long"}', True),
+  ExampleSchema("""\
+    {"type": "array",
+     "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
+    """, True),
+]
+
+MAP_EXAMPLES = [
+  ExampleSchema('{"type": "map", "values": "long"}', True),
+  ExampleSchema("""\
+    {"type": "map",
+     "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
+    """, True),
+]
+
+UNION_EXAMPLES = [
+  ExampleSchema('["string", "null", "long"]', True),
+  ExampleSchema('["null", "null"]', False),
+  ExampleSchema('["long", "long"]', False),
+  ExampleSchema("""\
+    [{"type": "array", "items": "long"}
+     {"type": "array", "items": "string"}]
+    """, False),
+]
+
+RECORD_EXAMPLES = [
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Test",
+     "fields": [{"name": "f",
+                 "type": "long"}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "error",
+     "name": "Test",
+     "fields": [{"name": "f",
+                 "type": "long"}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Node",
+     "fields": [{"name": "label", "type": "string"},
+                {"name": "children",
+                 "type": {"type": "array", "items": "Node"}}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Lisp",
+     "fields": [{"name": "value",
+                 "type": ["null", "string",
+                          {"type": "record",
+                           "name": "Cons",
+                           "fields": [{"name": "car", "type": "Lisp"},
+                                      {"name": "cdr", "type": "Lisp"}]}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "HandshakeRequest",
+     "namespace": "org.apache.avro.ipc",
+     "fields": [{"name": "clientHash",
+                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                {"name": "clientProtocol", "type": ["null", "string"]},
+                {"name": "serverHash", "type": "MD5"},
+                {"name": "meta", 
+                 "type": ["null", {"type": "map", "values": "bytes"}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "HandshakeResponse",
+     "namespace": "org.apache.avro.ipc",
+     "fields": [{"name": "match",
+                 "type": {"type": "enum",
+                          "name": "HandshakeMatch",
+                          "symbols": ["BOTH", "CLIENT", "NONE"]}},
+                {"name": "serverProtocol", "type": ["null", "string"]},
+                {"name": "serverHash",
+                 "type": ["null",
+                          {"name": "MD5", "size": 16, "type": "fixed"}]},
+                {"name": "meta",
+                 "type": ["null", {"type": "map", "values": "bytes"}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Interop",
+     "namespace": "org.apache.avro",
+     "fields": [{"name": "intField", "type": "int"},
+                {"name": "longField", "type": "long"},
+                {"name": "stringField", "type": "string"},
+                {"name": "boolField", "type": "boolean"},
+                {"name": "floatField", "type": "float"},
+                {"name": "doubleField", "type": "double"},
+                {"name": "bytesField", "type": "bytes"},
+                {"name": "nullField", "type": "null"},
+                {"name": "arrayField",
+                 "type": {"type": "array", "items": "double"}},
+                {"name": "mapField",
+                 "type": {"type": "map",
+                          "values": {"name": "Foo",
+                                     "type": "record",
+                                     "fields": [{"name": "label",
+                                                 "type": "string"}]}}},
+                {"name": "unionField",
+                 "type": ["boolean",
+                          "double",
+                          {"type": "array", "items": "bytes"}]},
+                {"name": "enumField",
+                 "type": {"type": "enum",
+                          "name": "Kind",
+                          "symbols": ["A", "B", "C"]}},
+                {"name": "fixedField",
+                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                {"name": "recordField",
+                 "type": {"type": "record",
+                          "name": "Node",
+                          "fields": [{"name": "label", "type": "string"},
+                                     {"name": "children",
+                                      "type": {"type": "array",
+                                               "items": "Node"}}]}}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "ipAddr",
+     "fields": [{"name": "addr", 
+                 "type": [{"name": "IPv6", "type": "fixed", "size": 16},
+                          {"name": "IPv4", "type": "fixed", "size": 4}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Address",
+     "fields": [{"type": "string"},
+                {"type": "string", "name": "City"}]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Event",
+     "fields": [{"name": "Sponsor"},
+                {"name": "City", "type": "string"}]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "record",
+     "fields": "His vision, from the constantly passing bars,"
+     "name", "Rainer"}
+    """, False),
+  ExampleSchema("""\
+    {"name": ["Tom", "Jerry"],
+     "type": "record",
+     "fields": [{"name": "name", "type": "string"}]}
+    """, False),
+]
+
+EXAMPLES = PRIMITIVE_EXAMPLES
+EXAMPLES += FIXED_EXAMPLES
+EXAMPLES += ENUM_EXAMPLES
+EXAMPLES += ARRAY_EXAMPLES
+EXAMPLES += MAP_EXAMPLES
+EXAMPLES += UNION_EXAMPLES
+EXAMPLES += RECORD_EXAMPLES
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+# TODO(hammer): refactor into harness for examples
+# TODO(hammer): pretty-print detailed output
+# TODO(hammer): make verbose flag
+# TODO(hammer): show strack trace to user
+# TODO(hammer): use logging module?
+class TestSchema(unittest.TestCase):
+
+  def test_correct_recursive_extraction(self):
+    s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
+    t = schema.parse(str(s.fields[0].type))
+    # If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
+    self.assertEqual("X", t.fields[0].type.name)
+
+  def test_parse(self):
+    correct = 0
+    for example in EXAMPLES:
+      try:
+        schema.parse(example.schema_string)
+        if example.valid:
+          correct += 1
+        else:
+          self.fail("Invalid schema was parsed: " + example.schema_string)
+      except:
+        if not example.valid: 
+          correct += 1
+        else:
+          self.fail("Valid schema failed to parse: " + example.schema_string)
+
+    fail_msg = "Parse behavior correct on %d out of %d schemas." % \
+      (correct, len(EXAMPLES))
+    self.assertEqual(correct, len(EXAMPLES), fail_msg)
+
+  def test_valid_cast_to_string_after_parse(self):
+    """
+    Test that the string generated by an Avro Schema object
+    is, in fact, a valid Avro schema.
+    """
+    print_test_name('TEST CAST TO STRING AFTER PARSE')
+    correct = 0
+    for example in VALID_EXAMPLES:
+      schema_data = schema.parse(example.schema_string)
+      schema.parse(str(schema_data))
+      correct += 1
+
+    fail_msg = "Cast to string success on %d out of %d schemas" % \
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
+
+  def test_equivalence_after_round_trip(self):
+    """
+    1. Given a string, parse it to get Avro schema "original".
+    2. Serialize "original" to a string and parse that string
+         to generate Avro schema "round trip".
+    3. Ensure "original" and "round trip" schemas are equivalent.
+    """
+    print_test_name('TEST ROUND TRIP')
+    correct = 0
+    for example in VALID_EXAMPLES:
+      original_schema = schema.parse(example.schema_string)
+      round_trip_schema = schema.parse(str(original_schema))
+      if original_schema == round_trip_schema:
+        correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:       
+        debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+        self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
+
+    fail_msg = "Round trip success on %d out of %d schemas" % \
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
+
+  # TODO(hammer): more tests
+  def test_fullname(self):
+    """
+    The fullname is determined in one of the following ways:
+     * A name and namespace are both specified.  For example,
+       one might use "name": "X", "namespace": "org.foo"
+       to indicate the fullname "org.foo.X".
+     * A fullname is specified.  If the name specified contains
+       a dot, then it is assumed to be a fullname, and any
+       namespace also specified is ignored.  For example,
+       use "name": "org.foo.X" to indicate the
+       fullname "org.foo.X".
+     * A name only is specified, i.e., a name that contains no
+       dots.  In this case the namespace is taken from the most
+       tightly encosing schema or protocol.  For example,
+       if "name": "X" is specified, and this occurs
+       within a field of the record definition
+       of "org.foo.Y", then the fullname is "org.foo.X".
+
+    References to previously defined names are as in the latter
+    two cases above: if they contain a dot they are a fullname, if
+    they do not contain a dot, the namespace is the namespace of
+    the enclosing definition.
+
+    Primitive type names have no namespace and their names may
+    not be defined in any namespace.  A schema may only contain
+    multiple definitions of a fullname if the definitions are
+    equivalent.
+    """
+    print_test_name('TEST FULLNAME')
+
+    # name and namespace specified    
+    fullname = schema.Name('a', 'o.a.h', None).fullname
+    self.assertEqual(fullname, 'o.a.h.a')
+
+    # fullname and namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.h', None).fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+    
+    # name and default namespace specified
+    fullname = schema.Name('a', None, 'b.c.d').fullname
+    self.assertEqual(fullname, 'b.c.d.a')
+
+    # fullname and default namespace specified
+    fullname = schema.Name('a.b.c.d', None, 'o.a.h').fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+
+    # fullname, namespace, default namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.a', 'o.a.h').fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+
+    # name, namespace, default namespace specified
+    fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
+    self.assertEqual(fullname, 'o.a.a.a')
+
+if __name__ == '__main__':
+  unittest.main()
-- 
1.7.4.4