mammoviews/combine_predictions_hdr_and_img.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Applications/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n",
      "/Applications/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tabledir = \"../tables/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(772423, 1)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fn = f\"{tabledir}/2017-06-mammo_tables/df_dcm_reports_birads_path_indic_dens_birad_wi_year_noreport_nodupl.csv.gz\"\n",
    "df_bt = pd.read_csv(fn, usecols=[\"id\", \"BT_case\"])\n",
    "df_bt.set_index(\"id\", inplace=True)\n",
    "df_bt = ~df_bt.isnull()\n",
    "df_bt.columns = [\"digital\"]\n",
    "df_bt.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>set</th>\n",
       "      <th>label</th>\n",
       "      <th>view</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1013372709_1.2.840.113654.2.70.1.175625299786291545159233542096043464711_3_1</th>\n",
       "      <td>test</td>\n",
       "      <td>normal</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1028995243_1.2.840.113654.2.70.1.56947963181878834591544466761404805157_45576_2</th>\n",
       "      <td>test</td>\n",
       "      <td>normal</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1105112884_1.2.840.113654.2.70.1.178729598744204462442695104630823323474_8905_2</th>\n",
       "      <td>test</td>\n",
       "      <td>normal</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1185125156_1.2.840.113654.2.70.1.45840593750642722243371816041014016032_2_4</th>\n",
       "      <td>test</td>\n",
       "      <td>normal</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1496452586_1.2.840.113654.2.70.1.5582568668770891599992528318631583880_1351_4</th>\n",
       "      <td>test</td>\n",
       "      <td>normal</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                     set   label view\n",
       "id                                                                   \n",
       "1013372709_1.2.840.113654.2.70.1.17562529978629...  test  normal    N\n",
       "1028995243_1.2.840.113654.2.70.1.56947963181878...  test  normal    N\n",
       "1105112884_1.2.840.113654.2.70.1.17872959874420...  test  normal    N\n",
       "1185125156_1.2.840.113654.2.70.1.45840593750642...  test  normal    N\n",
       "1496452586_1.2.840.113654.2.70.1.55825686687708...  test  normal    N"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "infile = f\"{tabledir}/spotmag_predictions/train_test_split-2018-02-16-within7e5-label.csv\"\n",
    "dflab = pd.read_csv(infile, index_col='id')\n",
    "dflab[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read header-based predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(772367, 1)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "infile = f\"{tabledir}/spotmag_predictions/all_predictions_glmnet.tab\"\n",
    "dfpred_glmnet = pd.read_table(infile, index_col=0)\n",
    "dfpred_glmnet.columns = [cc.replace(\"predictions\", \"score\") for cc in dfpred_glmnet.columns]\n",
    "dfpred_glmnet.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(772367, 5)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score_gbm</th>\n",
       "      <th>score_xgb</th>\n",
       "      <th>score_rpart</th>\n",
       "      <th>score_xgbt</th>\n",
       "      <th>ViewModifierCodeMeaning</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2454166001_1.2.840.113654.2.70.1.269947926355209368181920716215505958953_149405_2104556</th>\n",
       "      <td>0.009005</td>\n",
       "      <td>0.020207</td>\n",
       "      <td>0.006882</td>\n",
       "      <td>0.059474</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454166001_1.2.840.113654.2.70.1.269947926355209368181920716215505958953_149405_2104557</th>\n",
       "      <td>0.013337</td>\n",
       "      <td>0.016762</td>\n",
       "      <td>0.006882</td>\n",
       "      <td>0.059660</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454166001_1.2.840.113654.2.70.1.269947926355209368181920716215505958953_149484_2141538</th>\n",
       "      <td>0.013337</td>\n",
       "      <td>0.016762</td>\n",
       "      <td>0.006882</td>\n",
       "      <td>0.061051</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2454166001_1.2.840.113654.2.70.1.269947926355209368181920716215505958953_149484_2141537</th>\n",
       "      <td>0.013337</td>\n",
       "      <td>0.016762</td>\n",
       "      <td>0.006882</td>\n",
       "      <td>0.061051</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3337971863_1.2.840.113654.2.70.1.337982194343327746313656933304494759333_1_1</th>\n",
       "      <td>0.031560</td>\n",
       "      <td>0.059142</td>\n",
       "      <td>0.006882</td>\n",
       "      <td>0.157488</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    score_gbm  score_xgb  \\\n",
       "id                                                                         \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...   0.009005   0.020207   \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...   0.013337   0.016762   \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...   0.013337   0.016762   \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...   0.013337   0.016762   \n",
       "3337971863_1.2.840.113654.2.70.1.33798219434332...   0.031560   0.059142   \n",
       "\n",
       "                                                    score_rpart  score_xgbt  \\\n",
       "id                                                                            \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...     0.006882    0.059474   \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...     0.006882    0.059660   \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...     0.006882    0.061051   \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...     0.006882    0.061051   \n",
       "3337971863_1.2.840.113654.2.70.1.33798219434332...     0.006882    0.157488   \n",
       "\n",
       "                                                   ViewModifierCodeMeaning  \n",
       "id                                                                          \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...                     NaN  \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...                     NaN  \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...                     NaN  \n",
       "2454166001_1.2.840.113654.2.70.1.26994792635520...                     NaN  \n",
       "3337971863_1.2.840.113654.2.70.1.33798219434332...                     NaN  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "infile = f\"{tabledir}/spotmag_predictions/all_predictions_allmodels_trained_on_train.tab\"\n",
    "dfpred = pd.read_table(infile, index_col=0)\n",
    "dfpred.columns = [cc.replace(\"predictions\", \"score\") for cc in dfpred.columns]\n",
    "dfpred.index.name = 'id'\n",
    "print(dfpred.shape)\n",
    "dfpred[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(772367, 8)\n"
     ]
    }
   ],
   "source": [
    "if 'set' not in dfpred.columns:\n",
    "    dfpred = dfpred.merge(dflab,  left_index=True, right_index=True, how='left')\n",
    "    print(dfpred.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "normal          3526\n",
       "magn/spot        572\n",
       "wire loc          57\n",
       "stereotactic      25\n",
       "other              9\n",
       "Name: view, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "colmap = {\"N\":\"normal\", \"M\": \"magn/spot\",\n",
    "          \"T\":\"stereotactic\", \"W\":\"wire loc\", \"X\":\"other\"}\n",
    "view_counts = dfpred[~dfpred.view.isnull()].view.map(lambda x: colmap[x]).value_counts()\n",
    "view_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>set</th>\n",
       "      <th>train</th>\n",
       "      <th>test</th>\n",
       "      <th>val</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>view</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>magn/spot</th>\n",
       "      <td>380</td>\n",
       "      <td>96</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>normal</th>\n",
       "      <td>2310</td>\n",
       "      <td>612</td>\n",
       "      <td>604</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>other</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>stereotactic</th>\n",
       "      <td>17</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wire loc</th>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "set           train  test  val\n",
       "view                          \n",
       "magn/spot       380    96   96\n",
       "normal         2310   612  604\n",
       "other             4     3    2\n",
       "stereotactic     17     4    4\n",
       "wire loc         37    11    9"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab(dfpred[~dfpred.view.isnull()].view.map(lambda x: colmap[x]), dfpred.set)[[\"train\", \"test\", \"val\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read image-based predictions (general)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "../tables//spotmag_predictions/predictions_images_4189-epoch55-e5ce2d69b035975cb5336cec0da9a32a.csv\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['score_image', 'score_image_max'], dtype='object')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tag = \"e5ce2d69b035975cb5336cec0da9a32a\"\n",
    "epoch = 55\n",
    "infile = f\"{tabledir}/spotmag_predictions/predictions_images_4189-epoch{epoch}-{tag}.csv\"\n",
    "# infile = f\"{tabledir}/spotmag_predictions/df_dcm_reports_birads_path_indic_dens_birad_wi_year_noreport_nodupl-spotmag_img_prediction-{tag}.csv\"\n",
    "print(infile)\n",
    "dfpred_img = pd.read_csv(infile, index_col=0)\n",
    "dfpred_img = dfpred_img[['score_image', 'score_image_max']]\n",
    "dfpred_img = dfpred_img.groupby(level=0).mean()\n",
    "dfpred_img.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read image-based predictions (wire localization)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "infile = f\"{tabledir}/spotmag_predictions/predictions_wire_combined_e8e71fc090141d7c6fb334359152d295.csv\"\n",
    "\n",
    "dfpred_imgwire = pd.read_csv(infile, index_col=0)\n",
    "dfpred_imgwire[\"score_wire_max\"] = 1-dfpred_imgwire[[\"scores_0_or\",\"scores_0_fl\"]].min(1)\n",
    "dfpred_imgwire = dfpred_imgwire.drop([\"scores_0_or\",\"scores_0_fl\", \"label\"], axis=1)\n",
    "dfpred_imgwire.columns = [cc.replace(\"scores\", \"score_wire\") for cc in dfpred_imgwire.columns]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(772367, 13)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if 'score_image' not in dfpred.columns:\n",
    "    dfpred = pd.concat([dfpred, dfpred_img], axis=1)\n",
    "    dfpred.index.name = 'id'\n",
    "    del dfpred_img\n",
    "    \n",
    "if 'score_glmnet' not in dfpred.columns:\n",
    "    dfpred = pd.concat([dfpred, dfpred_glmnet], axis=1)\n",
    "    dfpred.index.name = 'id'\n",
    "    del dfpred_glmnet\n",
    "    \n",
    "if 'score_wire' not in dfpred.columns:\n",
    "    dfpred = pd.concat([dfpred, dfpred_imgwire], axis=1)\n",
    "    dfpred.index.name = 'id'\n",
    "    del dfpred_imgwire\n",
    "\n",
    "dfpred.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "if 'label' not in dfpred.columns:\n",
    "    dfpred = pd.concat([dfpred, dflab], axis=1)\n",
    "if 'digital' not in dfpred.columns:\n",
    "    dfpred = pd.concat([dfpred, df_bt], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>score_image</th>\n",
       "      <th>False</th>\n",
       "      <th>True</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>score_wire</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>False</th>\n",
       "      <td>3584</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>True</th>\n",
       "      <td>605</td>\n",
       "      <td>768234</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "score_image  False   True \n",
       "score_wire                \n",
       "False         3584       0\n",
       "True           605  768234"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab(dfpred[\"score_wire\"].isnull(), dfpred[\"score_image\"].isnull())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfpred.rename(columns={\"score_xgbt\":\"score_gbmt\"}, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Add ensembled (max, avg) scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfpred['score_wire'] = dfpred['score_wire'].fillna(0)\n",
    "dfpred['score_wire_max'] = dfpred['score_wire_max'].fillna(0)\n",
    "dfpred['score_image+glmnet'] = (dfpred['score_image'] + dfpred['score_glmnet'])/2\n",
    "dfpred['score_image+gbmt'] = (dfpred['score_image'] + dfpred['score_gbmt'])/2\n",
    "\n",
    "dfpred['score_max(image;gbmt)'] = dfpred[['score_image','score_gbmt']].max(1)\n",
    "\n",
    "dfpred['score_image*glmnet'] = np.sqrt(dfpred['score_image'] * dfpred['score_glmnet'])\n",
    "dfpred['score_image*gbmt'] = np.sqrt(dfpred['score_image'] * dfpred['score_gbmt'])\n",
    "dfpred['score_max_image_wire'] = np.nanmax(dfpred[['score_image','score_wire']].values, axis=1)\n",
    "dfpred['score_max_image_wire_max'] = np.nanmax(dfpred[['score_image','score_wire_max']].values, axis=1)\n",
    "# dfpred['score_wire'].isnull()\n",
    "dfpred['score_max_image_wire+gbmt'] =(dfpred['score_max_image_wire'] + dfpred['score_gbmt'])/2\n",
    "\n",
    "dfpred['score_max_image_wire_max+gbmt'] =(dfpred['score_max_image_wire_max'] + dfpred['score_gbmt'])/2\n",
    "\n",
    "dfpred['score_max(image;wire_max;gbmt)'] = dfpred[['score_wire_max','score_gbmt', 'score_image']].max(1)\n",
    "\n",
    "dfpred['score_max_wire_image+gbmt'] = np.nanmax(dfpred[['score_image+gbmt','score_wire']].values, axis=1)\n",
    "\n",
    "dfpred['score_max_wire_max_image+gbmt'] = np.nanmax(dfpred[['score_image+gbmt','score_wire_max']].values, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfpred.rename(columns={\"ViewModifierCodeMeaning\":\"ViewModifier\"}, inplace=True)\n",
    "dfpred.index.name = 'id'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save the combined table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "772423"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dfpred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dfpred.to_csv(f'{tabledir}/all_predictions_with_images-{tag}.tab', sep='\\t')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}