From 713a865f611a4388f6f2a5c4c18250a949053695 Mon Sep 17 00:00:00 2001 From: "Liana.Bakradze" Date: Tue, 4 Oct 2016 12:49:25 +0300 Subject: [PATCH] PY-20932 Pandas DataFrame viewer for multiindex frames raises TypeError --- .../pydev/_pydevd_bundle/pydevd_vars.py | 13 +- .../debug/test_dataframe_multiindex.py | 11 ++ .../env/python/PythonDataViewerTest.java | 117 ++++++++++++------ 3 files changed, 94 insertions(+), 47 deletions(-) create mode 100644 python/testData/debug/test_dataframe_multiindex.py diff --git a/python/helpers/pydev/_pydevd_bundle/pydevd_vars.py b/python/helpers/pydev/_pydevd_bundle/pydevd_vars.py index dd9e7e933d99..c2f691421913 100644 --- a/python/helpers/pydev/_pydevd_bundle/pydevd_vars.py +++ b/python/helpers/pydev/_pydevd_bundle/pydevd_vars.py @@ -592,23 +592,20 @@ def dataframe_to_xml(df, name, roffset, coffset, rows, cols, format): xml += "\n" % (rows, cols) format = format.replace('%', '') col_formats = [] + + get_label = lambda label: str(label) if not isinstance(label, tuple) else '/'.join([str(i) for i in label]) + for col in range(cols): - label = df.axes[1].values[col] - if isinstance(label, tuple): - label = '/'.join(label) - label = str(label) dtype = df.dtypes.iloc[col].kind fmt = format if (dtype == 'f' and format) else default_format(dtype) col_formats.append('%' + fmt) bounds = col_bounds[col] xml += '\n' % \ - (str(col), label, dtype, fmt, bounds[1], bounds[0]) + (str(col), get_label(df.axes[1].values[col]), dtype, fmt, bounds[1], bounds[0]) for row, label in enumerate(iter(df.axes[0])): - if isinstance(label, tuple): - label = '/'.join(label) xml += "\n" % \ - (str(row), label) + (str(row), get_label(label)) xml += "\n" xml += "\n" % (rows, cols) for row in range(rows): diff --git a/python/testData/debug/test_dataframe_multiindex.py b/python/testData/debug/test_dataframe_multiindex.py new file mode 100644 index 000000000000..657ad3595458 --- /dev/null +++ b/python/testData/debug/test_dataframe_multiindex.py @@ -0,0 +1,11 @@ +import pandas as pd +import numpy as np + +# multiindex rows +frame1 = pd.DataFrame(data=np.random.randint(0, high=10, size=(4, 2)), columns=['a', 'b'], index=pd.MultiIndex([['s', 'd'], [2, 3]], [[0, 0, 1, 1], [0, 1, 0, 1]])) +print(frame1) #line 6 + +# multiindex columns +frame2 = pd.DataFrame(np.random.random((4, 4))) +frame2.columns = pd.MultiIndex.from_product([[1, 2], [1, 'B']]) +print(frame2) # line 11 \ No newline at end of file diff --git a/python/testSrc/com/jetbrains/env/python/PythonDataViewerTest.java b/python/testSrc/com/jetbrains/env/python/PythonDataViewerTest.java index d988bb361686..c818157803c3 100644 --- a/python/testSrc/com/jetbrains/env/python/PythonDataViewerTest.java +++ b/python/testSrc/com/jetbrains/env/python/PythonDataViewerTest.java @@ -16,6 +16,8 @@ package com.jetbrains.env.python; import com.google.common.collect.ImmutableSet; +import com.intellij.util.Consumer; +import com.intellij.xdebugger.XDebugSession; import com.intellij.xdebugger.XDebuggerTestUtil; import com.jetbrains.env.PyEnvTestCase; import com.jetbrains.env.Staging; @@ -23,10 +25,12 @@ import com.jetbrains.env.python.debug.PyDebuggerTask; import com.jetbrains.python.debugger.ArrayChunk; import com.jetbrains.python.debugger.PyDebugValue; import com.jetbrains.python.debugger.PyDebuggerException; -import com.jetbrains.python.debugger.array.AsyncArrayTableModel; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import org.junit.Test; +import java.lang.reflect.InvocationTargetException; +import java.util.List; import java.util.Set; import static com.intellij.testFramework.UsefulTestCase.assertSameElements; @@ -40,53 +44,88 @@ public class PythonDataViewerTest extends PyEnvTestCase { @Test @Staging public void testDataFrameChunkRetrieval() throws Exception { - runPythonTest(new PyDebuggerTask("/debug", "test_dataframe.py") { + runPythonTest(new PyDataFrameDebuggerTask(getRelativeTestDataPath(), "test_dataframe.py", ImmutableSet.of(7, 15, 22)) { @Override - public void before() throws Exception { - toggleBreakpoint(getScriptName(), 7); - toggleBreakpoint(getScriptName(), 15); - toggleBreakpoint(getScriptName(), 22); + public void testing() throws Exception { + doTest("df1", 3, 5, null); + + doTest("df2", 3, 6, arrayChunk -> { + List colHeaders = arrayChunk.getColHeaders(); + assertSameElements(colHeaders.stream().map(ArrayChunk.ColHeader::getLabel).toArray(), + "LABELS", "One_X", "One_Y", "Two_X", "Two_Y", "row"); + }); + + doTest("df3", 7, 3, arrayChunk -> { + ArrayChunk.ColHeader header = arrayChunk.getColHeaders().get(2); + assertEquals("Sales", header.getLabel()); + assertEquals(16, (int)Integer.valueOf(header.getMax())); + assertEquals(1, (int)Integer.valueOf(header.getMin())); + }); } + }); + } + @Test + @Staging + public void testMultiIndexDataFrame() throws Exception { + runPythonTest(new PyDataFrameDebuggerTask(getRelativeTestDataPath(), "test_dataframe_multiindex.py", ImmutableSet.of(5, 10)) { @Override public void testing() throws Exception { - waitForPause(); - ArrayChunk df1 = getDefaultChunk("df1"); - assertEquals(5, df1.getColumns()); - assertEquals(3, df1.getRows()); - resume(); - - waitForPause(); - ArrayChunk df2 = getDefaultChunk("df2"); - assertEquals(6, df2.getColumns()); - assertEquals(3, df2.getRows()); - assertSameElements(df2.getColHeaders().stream().map((header -> header.getLabel())).toArray(), - new String[]{"LABELS", "One_X", "One_Y", "Two_X", "Two_Y", "row"}); - resume(); - - waitForPause(); - ArrayChunk df3 = getDefaultChunk("df3"); - assertEquals(3, df3.getColumns()); - assertEquals(7, df3.getRows()); - ArrayChunk.ColHeader header = df3.getColHeaders().get(2); - assertEquals("Sales", header.getLabel()); - assertEquals(16, (int)Integer.valueOf(header.getMax())); - assertEquals(1, (int)Integer.valueOf(header.getMin())); - resume(); + doTest("frame1", 4, 2, arrayChunk -> assertSameElements(arrayChunk.getRowLabels(), + "s/2", "s/3", "d/2", "d/3")); + doTest("frame2", 4, 4, arrayChunk -> { + List headers = arrayChunk.getColHeaders(); + assertSameElements(headers.stream().map(ArrayChunk.ColHeader::getLabel).toArray(), "1/1", "1/B", "2/1", "2/B"); + }); } + }); + } - private ArrayChunk getDefaultChunk(String varName) throws PyDebuggerException { - PyDebugValue dbgVal = (PyDebugValue)XDebuggerTestUtil.evaluate(mySession, varName).first; - return dbgVal.getFrameAccessor() - .getArrayItems(dbgVal, 0, 0, -1, -1, ".%5f"); - } + private static class PyDataFrameDebuggerTask extends PyDebuggerTask { + private Set myLines; - @NotNull - @Override - public Set getTags() { - return ImmutableSet.of("pandas"); + public PyDataFrameDebuggerTask(@Nullable String relativeTestDataPath, String scriptName, Set lines) { + super(relativeTestDataPath, scriptName); + myLines = lines; + } + + protected void testShape(ArrayChunk arrayChunk, int expectedRows, int expectedColumns) { + assertEquals(expectedRows, arrayChunk.getRows()); + assertEquals(expectedColumns, arrayChunk.getColumns()); + } + + protected void doTest(String name, int expectedRows, int expectedColumns, @Nullable Consumer test) + throws InvocationTargetException, InterruptedException, PyDebuggerException { + waitForPause(); + ArrayChunk arrayChunk = getDefaultChunk(name, mySession); + testShape(arrayChunk, expectedRows, expectedColumns); + if (test != null) { + test.consume(arrayChunk); } - }); + resume(); + } + + @Override + public void before() throws Exception { + for (Integer line : myLines) { + toggleBreakpoint(getScriptName(), line); + } + } + + @NotNull + @Override + public Set getTags() { + return ImmutableSet.of("pandas"); + } + } + + private static ArrayChunk getDefaultChunk(String varName, XDebugSession session) throws PyDebuggerException { + PyDebugValue dbgVal = (PyDebugValue)XDebuggerTestUtil.evaluate(session, varName).first; + return dbgVal.getFrameAccessor().getArrayItems(dbgVal, 0, 0, -1, -1, ".%5f"); + } + + private static String getRelativeTestDataPath() { + return "/debug"; } } -- 2.23.3