Data Selection
Contents
# Install the necessary dependencies
import os
import sys
import numpy as np
import pandas as pd
!{sys.executable} -m pip install --quiet jupyterlab_myst ipython
5.4.2. Data Selection#
5.4.2.1. Overview#
In this section, we’ll focus on how to slice, dice, and generally get and set subsets of Pandas objects.
5.4.2.2. Selection by label#
Whether a copy or a reference is returned for a setting operation, may depend on the context. This is sometimes called chained assignment
and should be avoided.
.loc
is strict when you present slicers that are not compatible (or convertible) with the index type. For example using integers in a DatetimeIndex
. These will raise a TypeError
.
dfl = pd.DataFrame(np.random.randn(5, 4),
columns=list('ABCD'),
index=pd.date_range('20130101', periods=5))
dfl.loc[2:3]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 dfl.loc[2:3]
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis)
1288 if isinstance(key, slice):
1289 self._validate_key(key, axis)
-> 1290 return self._get_slice_axis(key, axis=axis)
1291 elif com.is_bool_indexer(key):
1292 return self._getbool_axis(key, axis=axis)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis)
1321 return obj.copy(deep=False)
1323 labels = obj._get_axis(axis)
-> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1326 if isinstance(indexer, slice):
1327 return self.obj._slice(indexer, axis=axis)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/datetimes.py:809, in DatetimeIndex.slice_indexer(self, start, end, step, kind)
801 # GH#33146 if start and end are combinations of str and None and Index is not
802 # monotonic, we can not use Index.slice_indexer because it does not honor the
803 # actual elements, is only searching for start and end
804 if (
805 check_str_or_none(start)
806 or check_str_or_none(end)
807 or self.is_monotonic_increasing
808 ):
--> 809 return Index.slice_indexer(self, start, end, step, kind=kind)
811 mask = np.array(True)
812 deprecation_mask = np.array(True)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind)
6559 """
6560 Compute the slice indexer for input labels and step.
6561
(...)
6598 slice(1, 3, None)
6599 """
6600 self._deprecated_arg(kind, "kind", "slice_indexer")
-> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step)
6604 # return a slice
6605 if not is_scalar(start_slice):
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/base.py:6810, in Index.slice_locs(self, start, end, step, kind)
6808 start_slice = None
6809 if start is not None:
-> 6810 start_slice = self.get_slice_bound(start, "left")
6811 if start_slice is None:
6812 start_slice = 0
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/base.py:6719, in Index.get_slice_bound(self, label, side, kind)
6715 original_label = label
6717 # For datetime indices label may be a string that has to be converted
6718 # to datetime boundary according to its resolution.
-> 6719 label = self._maybe_cast_slice_bound(label, side)
6721 # we need to look up the label
6722 try:
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/datetimes.py:767, in DatetimeIndex._maybe_cast_slice_bound(self, label, side, kind)
762 if isinstance(label, date) and not isinstance(label, datetime):
763 # Pandas supports slicing with dates, treated as datetimes at midnight.
764 # https://github.com/pandas-dev/pandas/issues/31501
765 label = Timestamp(label).to_pydatetime()
--> 767 label = super()._maybe_cast_slice_bound(label, side, kind=kind)
768 self._deprecate_mismatched_indexing(label)
769 return self._maybe_cast_for_get_loc(label)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexes/datetimelike.py:320, in DatetimeIndexOpsMixin._maybe_cast_slice_bound(self, label, side, kind)
318 return lower if side == "left" else upper
319 elif not isinstance(label, self._data._recognized_scalars):
--> 320 raise self._invalid_indexer("slice", label)
322 return label
TypeError: cannot do slice indexing on DatetimeIndex with these indexers [2] of type int
String likes in slicing can be convertible to the type of the index and lead to natural slicing.
dfl.loc['20130102':'20130104']
A | B | C | D | |
---|---|---|---|---|
2013-01-02 | 0.588777 | 1.250445 | -0.276583 | -0.121396 |
2013-01-03 | 0.967615 | -1.820374 | 0.565907 | -0.893642 |
2013-01-04 | 0.732792 | 0.428791 | -1.568156 | -0.182880 |
from IPython.display import HTML
display(
HTML(
"""
<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:730px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%20%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dpd.date_range%28'20130101',%20periods%3D5%29%29%0Adfl%0Adfl.loc%5B'20130102'%3A'20130104'%5D&d=2023-07-13&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
Pandas will raise a KeyError
if indexing with a list with missing labels.
Pandas provides a suite of methods in order to have purely label-based indexing. This is a strict inclusion-based protocol. Every label asked for must be in the index, or a KeyError
will be raised. When slicing, both the start bound AND the stop bound are included, if present in the index. Integers are valid labels, but they refer to the label and not the position.
The
.loc
attribute is the primary access method. The following are valid inputs:A single label, e.g.
5
or'a'
(Note that5
is interpreted as a label of the index. This use is not an integer position along the index.).A list or array of labels
['a', 'b', 'c']
.A slice object with labels
'a':'f'
(Note that contrary to usual Python slices, both the start and the stop are included, when present in the index!A boolean array.
A
callable
.
s1 = pd.Series(np.random.randn(6), index=list('abcdef'))
s1
s1.loc['c':]
c 1.519834
d -0.629201
e -0.726953
f 1.290481
dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:730px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'c'%3A%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
s1.loc['b']
0.3983111784912009
from IPython.display import HTML
display(
HTML(
"""
<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:630px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'b'%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
Note that the setting works as well:
s1.loc['c':] = 0
s1
a 0.229543
b 0.398311
c 0.000000
d 0.000000
e 0.000000
f 0.000000
dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:630px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%286%29,%20index%3Dlist%28'abcdef'%29%29%0As1.loc%5B'c'%3A%5D%20%3D%200%0As1&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1 = pd.DataFrame(np.random.randn(6, 4),
index=list('abcdef'),
columns=list('ABCD'))
df1
df1.loc[['a', 'b', 'd'], :]
A | B | C | D | |
---|---|---|---|---|
a | 1.582003 | 0.987351 | -0.522115 | 0.683331 |
b | -0.020445 | -1.267777 | -0.030918 | 1.174362 |
d | 0.242241 | 0.194438 | -0.405266 | 0.569506 |
from IPython.display import HTML
display(
HTML(
"""
<link rel="stylesheet" href="https://ocademy-ai.github.io/machine-learning/_static/style.css">
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:680px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B%5B'a',%20'b',%20'd'%5D,%20%3A%5D%0A&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
Accessing via label slices:
df1.loc['d':, 'A':'C']
A | B | C | |
---|---|---|---|
d | 0.242241 | 0.194438 | -0.405266 |
e | 0.601422 | 1.325717 | 1.402254 |
f | 0.788776 | 1.725383 | 0.271514 |
For getting a cross-section using a label (equivalent to df.xs('a')
):
df1.loc['a']
A 1.582003
B 0.987351
C -0.522115
D 0.683331
Name: a, dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:690px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a'%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
For getting values with a boolean array:
df1.loc['a'] > 0
A True
B True
C False
D True
Name: a, dtype: bool
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:650px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a'%5D%20%3E%200&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.loc[:, df1.loc['a'] > 0]
A | B | D | |
---|---|---|---|
a | 1.582003 | 0.987351 | 0.683331 |
b | -0.020445 | -1.267777 | 1.174362 |
c | 0.945725 | -0.525497 | -0.085318 |
d | 0.242241 | 0.194438 | 0.569506 |
e | 0.601422 | 1.325717 | -0.200271 |
f | 0.788776 | 1.725383 | -0.974910 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B%3A,%20df1.loc%5B'a'%5D%20%3E%200%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
NA values in a boolean array propagate as False
:
mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean")
mask
<BooleanArray>
[True, False, True, False, <NA>, False]
Length: 6, dtype: boolean
df1[mask]
A | B | C | D | |
---|---|---|---|---|
a | 1.582003 | 0.987351 | -0.522115 | 0.683331 |
c | 0.945725 | -0.525497 | -0.891158 | -0.085318 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:680px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Amask%20%3D%20pd.array%28%5BTrue,%20False,%20True,%20False,%20pd.NA,%20False%5D,%20dtype%3D%22boolean%22%29%0Adf1%5Bmask%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
For getting a value explicitly:
df1.loc['a', 'A'] # this is also equivalent to ``df1.at['a','A']``
1.5820025794487522
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:680px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5B'a',%20'A'%5D%20&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
5.4.2.3. Slicing with labels#
When using .loc
with slices, if both the start and the stop labels are present in the index, then elements located between the two (including them) are returned:
s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4])
s.loc[3:5]
3 b
2 c
5 d
dtype: object
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:580px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.loc%5B3%3A5%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
If at least one of the two is absent, but the index is sorted, and can be compared against start and stop labels, then slicing will still work as expected, by selecting labels which rank between the two:
s.sort_index()
0 a
2 c
3 b
4 e
5 d
dtype: object
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:600px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.sort_index%28%29&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
s.sort_index().loc[1:6]
2 c
3 b
4 e
5 d
dtype: object
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:820px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcde'%29,%20index%3D%5B0,%203,%202,%205,%204%5D%29%0As.sort_index%28%29.loc%5B1%3A6%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
However, if at least one of the two is absent and the index is not sorted, an error will be raised (since doing otherwise would be computationally expensive, as well as potentially ambiguous for mixed-type indexes). For instance, in the above example, s.loc[1:6]
would raise KeyError
.
s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2])
s.loc[3:5]
3 b
2 c
5 d
dtype: object
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:650px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0As%20%3D%20pd.Series%28list%28'abcdef'%29,%20index%3D%5B0,%203,%202,%205,%204,%202%5D%29%0As.loc%5B3%3A5%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
Also, if the index has duplicate labels and either the start or the stop label is duplicated, an error will be raised. For instance, in the above example, s.loc[2:5]
would raise a KeyError
.
5.4.2.4. Selection by position#
Whether a copy or a reference is returned for a setting operation, may depend on the context. This is sometimes called chained assignment
and should be avoided.
Pandas provides a suite of methods in order to get purely integer-based indexing. The semantics follow closely Python and NumPy slicing. These are 0-based indexing. When slicing, the start bound is included, while the upper bound is excluded. Trying to use a non-integer, even a valid label will raise an IndexError
.
The .iloc
attribute is the primary access method. The following are valid inputs:
An integer e.g.
5
.A list or array of integers
[4, 3, 0]
.A slice object with ints
1:7
.A boolean array.
A
callable
.
s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2)))
s1
s1.iloc[:3]
0 -0.805919
2 0.428108
4 1.812731
dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:600px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
s1.iloc[3]
1.9849031639081407
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:600px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
Note that setting works as well:
s1.iloc[:3] = 0
s1
0 0.000000
2 0.000000
4 0.000000
6 1.984903
8 0.475023
dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:620px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0As1%20%3D%20pd.Series%28np.random.randn%285%29,%20index%3Dlist%28range%280,%2010,%202%29%29%29%0As1.iloc%5B%3A3%5D%20%3D%200%0As1&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
With a DataFrame,Select via integer slicing:
df1 = pd.DataFrame(np.random.randn(6, 4),
index=list(range(0, 12, 2)),
columns=list(range(0, 8, 2)))
df1
df1.iloc[:3]
0 | 2 | 4 | 6 | |
---|---|---|---|---|
0 | -0.527504 | 0.054396 | 0.112646 | -0.082599 |
2 | 0.084774 | -0.332679 | -0.528022 | -1.085305 |
4 | -1.828948 | -0.163621 | -0.411630 | -0.927196 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:680px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.iloc[1:5, 2:4]
4 | 6 | |
---|---|---|
2 | -0.528022 | -1.085305 |
4 | -0.411630 | -0.927196 |
6 | 1.496097 | -0.207705 |
8 | -0.239940 | -0.255905 |
Select via integer list:
df1.iloc[[1, 3, 5], [1, 3]]
2 | 6 | |
---|---|---|
2 | -0.332679 | -1.085305 |
6 | -1.243526 | -0.207705 |
10 | 0.019804 | 0.761351 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%3A5,%202%3A4%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.iloc[1:3, :]
0 | 2 | 4 | 6 | |
---|---|---|---|---|
2 | 0.084774 | -0.332679 | -0.528022 | -1.085305 |
4 | -1.828948 | -0.163621 | -0.411630 | -0.927196 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:680px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%3A3,%20%3A%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.iloc[:, 1:3]
2 | 4 | |
---|---|---|
0 | 0.054396 | 0.112646 |
2 | -0.332679 | -0.528022 |
4 | -0.163621 | -0.411630 |
6 | -1.243526 | 1.496097 |
8 | -1.219965 | -0.239940 |
10 | 0.019804 | -0.455306 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B%3A,%201%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.iloc[1, 1] # this is also equivalent to ``df1.iat[1,1]``
-0.3326786237713185
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:680px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1,%201%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
For getting a cross-section using an integer position (equiv to df.xs(1)
):
df1.iloc[1]
0 0.084774
2 -0.332679
4 -0.528022
6 -1.085305
Name: 2, dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28range%280,%2012,%202%29%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28range%280,%208,%202%29%29%29%0Adf1%0Adf1.iloc%5B1%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
Out-of-range slice indexes are handled gracefully just as in Python/NumPy.
x = list('abcdef') # these are allowed in Python/NumPy.
x
['a', 'b', 'c', 'd', 'e', 'f']
x[4:10]
['e', 'f']
x[8:10]
[]
s = pd.Series(x)
s
0 a
1 b
2 c
3 d
4 e
5 f
dtype: object
s.iloc[4:10]
4 e
5 f
dtype: object
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:600px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Ax%20%3D%20list%28'abcdef'%29%20%0As%20%3D%20pd.Series%28x%29%0As.iloc%5B4%3A10%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
s.iloc[8:10]
Series([], dtype: object)
Note that using slices that go out of bounds can result in an empty axis (e.g. an empty DataFrame being returned).
dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))
dfl.iloc[:, 2:3]
0 |
---|
1 |
2 |
3 |
4 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:630px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl%0Adfl.iloc%5B%3A,%202%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
dfl.iloc[:, 1:3]
B | |
---|---|
0 | -1.035027 |
1 | -1.430560 |
2 | 0.642986 |
3 | 0.487318 |
4 | -2.004848 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:630px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl.iloc%5B%3A,%201%3A3%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
dfl.iloc[4:6]
A | B | |
---|---|---|
4 | -0.323008 | -2.004848 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:600px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adfl%20%3D%20pd.DataFrame%28np.random.randn%285,%202%29,%20columns%3Dlist%28'AB'%29%29%0Adfl.iloc%5B4%3A6%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
A single indexer that is out of bounds will raise an IndexError
. A list of indexers where any element is out of bounds will raise an IndexError
.
dfl.iloc[[4, 5, 6]]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1587, in _iLocIndexer._get_list_axis(self, key, axis)
1586 try:
-> 1587 return self.obj._take_with_is_copy(key, axis=axis)
1588 except IndexError as err:
1589 # re-raise with different error message
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/generic.py:3902, in NDFrame._take_with_is_copy(self, indices, axis)
3895 """
3896 Internal version of the `take` method that sets the `_is_copy`
3897 attribute to keep track of the parent dataframe (using in indexing
(...)
3900 See the docstring of `take` for full explanation of the parameters.
3901 """
-> 3902 result = self._take(indices=indices, axis=axis)
3903 # Maybe set copy if we didn't actually change the index.
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/generic.py:3886, in NDFrame._take(self, indices, axis, convert_indices)
3884 self._consolidate_inplace()
-> 3886 new_data = self._mgr.take(
3887 indices,
3888 axis=self._get_block_manager_axis(axis),
3889 verify=True,
3890 convert_indices=convert_indices,
3891 )
3892 return self._constructor(new_data).__finalize__(self, method="take")
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/internals/managers.py:977, in BaseBlockManager.take(self, indexer, axis, verify, convert_indices)
976 if convert_indices:
--> 977 indexer = maybe_convert_indices(indexer, n, verify=verify)
979 new_labels = self.axes[axis].take(indexer)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexers/utils.py:286, in maybe_convert_indices(indices, n, verify)
285 if mask.any():
--> 286 raise IndexError("indices are out-of-bounds")
287 return indices
IndexError: indices are out-of-bounds
The above exception was the direct cause of the following exception:
IndexError Traceback (most recent call last)
Cell In[67], line 1
----> 1 dfl.iloc[[4, 5, 6]]
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1616, in _iLocIndexer._getitem_axis(self, key, axis)
1614 # a list of integers
1615 elif is_list_like_indexer(key):
-> 1616 return self._get_list_axis(key, axis=axis)
1618 # a single integer
1619 else:
1620 key = item_from_zerodim(key)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1590, in _iLocIndexer._get_list_axis(self, key, axis)
1587 return self.obj._take_with_is_copy(key, axis=axis)
1588 except IndexError as err:
1589 # re-raise with different error message
-> 1590 raise IndexError("positional indexers are out-of-bounds") from err
IndexError: positional indexers are out-of-bounds
dfl.iloc[:, 4]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[68], line 1
----> 1 dfl.iloc[:, 4]
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1067, in _LocationIndexer.__getitem__(self, key)
1065 if self._is_scalar_access(key):
1066 return self.obj._get_value(*key, takeable=self._takeable)
-> 1067 return self._getitem_tuple(key)
1068 else:
1069 # we by definition only have the 0th axis
1070 axis = self.axis or 0
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1563, in _iLocIndexer._getitem_tuple(self, tup)
1561 def _getitem_tuple(self, tup: tuple):
-> 1563 tup = self._validate_tuple_indexer(tup)
1564 with suppress(IndexingError):
1565 return self._getitem_lowerdim(tup)
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:873, in _LocationIndexer._validate_tuple_indexer(self, key)
871 for i, k in enumerate(key):
872 try:
--> 873 self._validate_key(k, i)
874 except ValueError as err:
875 raise ValueError(
876 "Location based indexing can only have "
877 f"[{self._valid_types}] types"
878 ) from err
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1466, in _iLocIndexer._validate_key(self, key, axis)
1464 return
1465 elif is_integer(key):
-> 1466 self._validate_integer(key, axis)
1467 elif isinstance(key, tuple):
1468 # a tuple should already have been caught by this point
1469 # so don't treat a tuple as a valid indexer
1470 raise IndexingError("Too many indexers")
File /usr/share/miniconda/envs/open-machine-learning-jupyter-book/lib/python3.9/site-packages/pandas/core/indexing.py:1557, in _iLocIndexer._validate_integer(self, key, axis)
1555 len_axis = len(self.obj._get_axis(axis))
1556 if key >= len_axis or key < -len_axis:
-> 1557 raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
5.4.2.5. Selection by callable#
.loc
, .iloc
, and also []
indexing can accept a callable
as indexer. The callable
must be a function with one argument (the calling Series or DataFrame) that returns valid output for indexing.
df1 = pd.DataFrame(np.random.randn(6, 4),
index=list('abcdef'),
columns=list('ABCD'))
df1
df1.loc[lambda df: df['A'] > 0, :]
A | B | C | D | |
---|---|---|---|---|
b | 1.601640 | 0.785938 | 0.559648 | 1.609879 |
c | 0.298113 | -0.420427 | 0.918680 | 0.436950 |
d | 0.961635 | -0.252276 | 1.326258 | -1.436899 |
e | 0.192715 | 0.390917 | -0.232632 | 0.392365 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%20%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%0Adf1.loc%5Blambda%20df%3A%20df%5B'A'%5D%20%3E%200,%20%3A%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.loc[:, lambda df: ['A', 'B']]
A | B | |
---|---|---|
a | -0.553925 | 0.516329 |
b | 1.601640 | 0.785938 |
c | 0.298113 | -0.420427 |
d | 0.961635 | -0.252276 |
e | 0.192715 | 0.390917 |
f | -0.963618 | 0.796912 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1.loc%5B%3A,%20lambda%20df%3A%20%5B'A',%20'B'%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1.iloc[:, lambda df: [0, 1]]
A | B | |
---|---|---|
a | -0.553925 | 0.516329 |
b | 1.601640 | 0.785938 |
c | 0.298113 | -0.420427 |
d | 0.961635 | -0.252276 |
e | 0.192715 | 0.390917 |
f | -0.963618 | 0.796912 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:700px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1.iloc%5B%3A,%20lambda%20df%3A%20%5B0,%201%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
df1[lambda df: df.columns[0]]
a -0.553925
b 1.601640
c 0.298113
d 0.961635
e 0.192715
f -0.963618
Name: A, dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:650px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%5Blambda%20df%3A%20df.columns%5B0%5D%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
You can use callable indexing in Series
.
df1['A'].loc[lambda s: s > 0]
b 1.601640
c 0.298113
d 0.961635
e 0.192715
Name: A, dtype: float64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:920px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Aimport%20numpy%20as%20np%0Aimport%20random%0Anp.random.seed%2810%29%0Adf1%20%3D%20pd.DataFrame%28np.random.randn%286,%204%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abcdef'%29,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20columns%3Dlist%28'ABCD'%29%29%0Adf1%5B'A'%5D.loc%5Blambda%20s%3A%20s%20%3E%200%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
5.4.2.5.1. Combining positional and label-based indexing#
If you wish to get the 0th and the 2nd elements from the index in the 'A'
column, you can do:
dfd = pd.DataFrame({'A': [1, 2, 3],
'B': [4, 5, 6]},
index=list('abc'))
dfd
dfd.loc[dfd.index[[0, 2]], 'A']
a 1
c 3
Name: A, dtype: int64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:550px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.loc%5Bdfd.index%5B%5B0,%202%5D%5D,%20'A'%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
This can also be expressed using .iloc
, by explicitly getting locations on the indexers, and using positional indexing to select things.
dfd.iloc[[0, 2], dfd.columns.get_loc('A')]
a 1
c 3
Name: A, dtype: int64
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:550px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.iloc%5B%5B0,%202%5D,%20dfd.columns.get_loc%28'A'%29%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
For getting multiple indexers, using .get_indexer
:
dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])]
A | B | |
---|---|---|
a | 1 | 4 |
c | 3 | 6 |
from IPython.display import HTML
display(
HTML(
"""
<div class='full-width docutils' >
<div class="admonition note pandastutor" name="html-admonition" style="margin-right:20%">
<p class="admonition-title pandastutor">Let's visualize it! 🎥</p>
<div class="pandastutor inner" style="height:550px;">
<iframe frameborder="0" scrolling="no" src="https://pandastutor.com/vis.html#code=import%20pandas%20as%20pd%0Aimport%20io%0Adfd%20%3D%20pd.DataFrame%28%7B'A'%3A%20%5B1,%202,%203%5D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20'B'%3A%20%5B4,%205,%206%5D%7D,%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20index%3Dlist%28'abc'%29%29%0Adfd.iloc%5B%5B0,%202%5D,%20dfd.columns.get_indexer%28%5B'A',%20'B'%5D%29%5D&d=2023-07-14&lang=py&v=v1"> </iframe>
</div>
</div>
</div>
"""
)
)
Let's visualize it! 🎥
5.4.2.6. Acknowledgments#
Thanks for Pandas user guide. It contributes the majority of the content in this chapter.