TL;DR:目前而言最简单的实现还是走 Excel 中转一次。
以下是从开发者的角度,通过探索这一功能如何实现,尝试找到解决方案的过程。
Notion 本质上是一个前端 App,那么应该也只能走 Web API 获得剪贴板内容。查阅 MDN 可以找到 Clipboard API,其中有四个方法:read()
, readText()
, write()
, writeText()
。其中带 Text
后缀的仅支持纯文本,不带的则支持更复杂的格式(图片,HTML 片段…)。
注:其实不仅 Notion 有这个问题,其他基于 Electron 的程序,如 Typora 也有多行表格纯文本复制的问题。
然后看看如果从 Excel 中复制内容,实际被写入剪贴板的是什么。这里可以用到 Nirsoft 出品的 InsideClipboard。此处需要补充一些 Windows 小知识:剪贴板中可以存放同一个对象的多种不同格式,应用程序在粘贴时可以按需选择。截图中可以看到 Excel 提供了很多不同的格式,不过实际上浏览器里能拿到的不过只有 text/plain
, text/html
, image/png
三种。既然已经知道直接从 tab 分割的纯文本复制是无法正确恢复多行表格的,图片则更不可能,那么能正确保留多行表格的就只有 text/html
了。为了验证这一猜想,可以在浏览器中打开一个包含 HTML 表格的网页,选中表格部分并复制到 Notion,多行表格也的确成功被保留了,猜想正确。
到了这一步,剩下的思路就很明确了:写个脚本,监听剪贴板中的变化,如果检测到复制了 Tab 分割的纯文本表格,将其转化为 HTML 表格,再写入剪贴板,在 Notion 中就能粘贴了。
一开始我试图用 Javascript 实现,但 Clipboard API 的 write
一直失败,换了多种写法也没找出原因。然后试着用 Python 写了个 proof-of-concept ,暂且算是能用了。理论上用 AHK 应该效果更好一些,或者也可以用 Quicker 的自定义动作。
附录
InsideClipboard 复制 Excel 内容后的截图
测试浏览器能获取的剪贴板格式
navigator.permissions.query({ name: "clipboard-read" }).then((result) => {
if (result.state == "granted" || result.state == "prompt") {
navigator.clipboard.read().then((data) => {
for (let i = 0; i < data.length; i++) {
console.log(`${i} data_type ${data[i].types}`)
// console.log(data[i])
data[i].getType("text/plain").then((blob) => {
blob.text().then(text => {
console.log("text/palin")
console.log(text)
})
})
data[i].getType("text/html").then((blob) => {
blob.text().then(text => {
console.log("text/html")
console.log(text)
})
})
}
});
}
});
Python 的自动转换脚本(多行 Tab 分割的纯文本表格 → HTML 表格)
依赖 pyperclip
和 pywin32
,对剪贴板 HTML 格式操作代码来自于 How to copy HTML code to clipboard using Python? - Stack Overflow
import pyperclip
from time import sleep
# https://stackoverflow.com/questions/55698762/how-to-copy-html-code-to-clipboard-using-python
"""
Edit on Jan 02, 2020
@author: the_RR
Adapted for python 3.4+
Requires pywin32
original: http://code.activestate.com/recipes/474121/
# HtmlClipboard
# An interface to the "HTML Format" clipboard data format
__author__ = "Phillip Piper (jppx1[at]bigfoot.com)"
__date__ = "2006-02-21"
__version__ = "0.1"
"""
import re
import time
import random
import win32clipboard
#---------------------------------------------------------------------------
# Convenience functions to do the most common operation
def HasHtml():
"""
Return True if there is a Html fragment in the clipboard..
"""
cb = HtmlClipboard()
return cb.HasHtmlFormat()
def GetHtml():
"""
Return the Html fragment from the clipboard or None if there is no Html in the clipboard.
"""
cb = HtmlClipboard()
if cb.HasHtmlFormat():
return cb.GetFragment()
else:
return None
def PutHtml(fragment):
"""
Put the given fragment into the clipboard.
Convenience function to do the most common operation
"""
cb = HtmlClipboard()
cb.PutFragment(fragment)
#---------------------------------------------------------------------------
class HtmlClipboard:
CF_HTML = None
MARKER_BLOCK_OUTPUT = \
"Version:1.0\r\n" \
"StartHTML:%09d\r\n" \
"EndHTML:%09d\r\n" \
"StartFragment:%09d\r\n" \
"EndFragment:%09d\r\n" \
"StartSelection:%09d\r\n" \
"EndSelection:%09d\r\n" \
"SourceURL:%s\r\n"
MARKER_BLOCK_EX = \
"Version:(\S+)\s+" \
"StartHTML:(\d+)\s+" \
"EndHTML:(\d+)\s+" \
"StartFragment:(\d+)\s+" \
"EndFragment:(\d+)\s+" \
"StartSelection:(\d+)\s+" \
"EndSelection:(\d+)\s+" \
"SourceURL:(\S+)"
MARKER_BLOCK_EX_RE = re.compile(MARKER_BLOCK_EX)
MARKER_BLOCK = \
"Version:(\S+)\s+" \
"StartHTML:(\d+)\s+" \
"EndHTML:(\d+)\s+" \
"StartFragment:(\d+)\s+" \
"EndFragment:(\d+)\s+" \
"SourceURL:(\S+)"
MARKER_BLOCK_RE = re.compile(MARKER_BLOCK)
DEFAULT_HTML_BODY = \
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" \
"<HTML><HEAD></HEAD><BODY><!--StartFragment-->%s<!--EndFragment--></BODY></HTML>"
def __init__(self):
self.html = None
self.fragment = None
self.selection = None
self.source = None
self.htmlClipboardVersion = None
def GetCfHtml(self):
"""
Return the FORMATID of the HTML format
"""
if self.CF_HTML is None:
self.CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
return self.CF_HTML
def GetAvailableFormats(self):
"""
Return a possibly empty list of formats available on the clipboard
"""
formats = []
try:
win32clipboard.OpenClipboard(0)
cf = win32clipboard.EnumClipboardFormats(0)
while (cf != 0):
formats.append(cf)
cf = win32clipboard.EnumClipboardFormats(cf)
finally:
win32clipboard.CloseClipboard()
return formats
def HasHtmlFormat(self):
"""
Return a boolean indicating if the clipboard has data in HTML format
"""
return (self.GetCfHtml() in self.GetAvailableFormats())
def GetFromClipboard(self):
"""
Read and decode the HTML from the clipboard
"""
# implement fix from: http://teachthe.net/?p=1137
cbOpened = False
while not cbOpened:
try:
win32clipboard.OpenClipboard(0)
src = win32clipboard.GetClipboardData(self.GetCfHtml())
src = src.decode("UTF-8")
#print(src)
self.DecodeClipboardSource(src)
cbOpened = True
win32clipboard.CloseClipboard()
except Exception as err:
# If access is denied, that means that the clipboard is in use.
# Keep trying until it's available.
if err.winerror == 5: # Access Denied
pass
# wait on clipboard because something else has it. we're waiting a
# random amount of time before we try again so we don't collide again
time.sleep( random.random()/50 )
elif err.winerror == 1418: # doesn't have board open
pass
elif err.winerror == 0: # open failure
pass
else:
print( 'ERROR in Clipboard section of readcomments: %s' % err)
pass
def DecodeClipboardSource(self, src):
"""
Decode the given string to figure out the details of the HTML that's on the string
"""
# Try the extended format first (which has an explicit selection)
matches = self.MARKER_BLOCK_EX_RE.match(src)
if matches:
self.prefix = matches.group(0)
self.htmlClipboardVersion = matches.group(1)
self.html = src[int(matches.group(2)):int(matches.group(3))]
self.fragment = src[int(matches.group(4)):int(matches.group(5))]
self.selection = src[int(matches.group(6)):int(matches.group(7))]
self.source = matches.group(8)
else:
# Failing that, try the version without a selection
matches = self.MARKER_BLOCK_RE.match(src)
if matches:
self.prefix = matches.group(0)
self.htmlClipboardVersion = matches.group(1)
self.html = src[int(matches.group(2)):int(matches.group(3))]
self.fragment = src[int(matches.group(4)):int(matches.group(5))]
self.source = matches.group(6)
self.selection = self.fragment
def GetHtml(self, refresh=False):
"""
Return the entire Html document
"""
if not self.html or refresh:
self.GetFromClipboard()
return self.html
def GetFragment(self, refresh=False):
"""
Return the Html fragment. A fragment is well-formated HTML enclosing the selected text
"""
if not self.fragment or refresh:
self.GetFromClipboard()
return self.fragment
def GetSelection(self, refresh=False):
"""
Return the part of the HTML that was selected. It might not be well-formed.
"""
if not self.selection or refresh:
self.GetFromClipboard()
return self.selection
def GetSource(self, refresh=False):
"""
Return the URL of the source of this HTML
"""
if not self.selection or refresh:
self.GetFromClipboard()
return self.source
def PutFragment(self, fragment, selection=None, html=None, source=None):
"""
Put the given well-formed fragment of Html into the clipboard.
selection, if given, must be a literal string within fragment.
html, if given, must be a well-formed Html document that textually
contains fragment and its required markers.
"""
if selection is None:
selection = fragment
if html is None:
html = self.DEFAULT_HTML_BODY % fragment
if source is None:
source = "file://HtmlClipboard.py"
fragmentStart = html.index(fragment)
fragmentEnd = fragmentStart + len(fragment)
selectionStart = html.index(selection)
selectionEnd = selectionStart + len(selection)
self.PutToClipboard(html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source)
def PutToClipboard(self, html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source="None"):
"""
Replace the Clipboard contents with the given html information.
"""
try:
win32clipboard.OpenClipboard(0)
win32clipboard.EmptyClipboard()
src = self.EncodeClipboardSource(html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source)
src = src.encode("UTF-8")
#print(src)
win32clipboard.SetClipboardData(self.GetCfHtml(), src)
finally:
win32clipboard.CloseClipboard()
def EncodeClipboardSource(self, html, fragmentStart, fragmentEnd, selectionStart, selectionEnd, source):
"""
Join all our bits of information into a string formatted as per the HTML format specs.
"""
# How long is the prefix going to be?
dummyPrefix = self.MARKER_BLOCK_OUTPUT % (0, 0, 0, 0, 0, 0, source)
lenPrefix = len(dummyPrefix)
prefix = self.MARKER_BLOCK_OUTPUT % (lenPrefix, len(html)+lenPrefix,
fragmentStart+lenPrefix, fragmentEnd+lenPrefix,
selectionStart+lenPrefix, selectionEnd+lenPrefix,
source)
return (prefix + html)
def DumpHtml():
cb = HtmlClipboard()
print("GetAvailableFormats()=%s" % str(cb.GetAvailableFormats()))
print("HasHtmlFormat()=%s" % str(cb.HasHtmlFormat()))
if cb.HasHtmlFormat():
cb.GetFromClipboard()
print("prefix=>>>%s<<<END" % cb.prefix)
print("htmlClipboardVersion=>>>%s<<<END" % cb.htmlClipboardVersion)
print("GetSelection()=>>>%s<<<END" % cb.GetSelection())
print("GetFragment()=>>>%s<<<END" % cb.GetFragment())
print("GetHtml()=>>>%s<<<END" % cb.GetHtml())
print("GetSource()=>>>%s<<<END" % cb.GetSource())
def tsv_to_html_table(tsv):
html_table_lines = ["<table>"]
for line in tsv.split("\n"):
items = line.split("\t")
html_table_line = "".join(["<td>" + item.strip('"') + "</td>" for item in items])
html_table_lines.append(f"<tr>{html_table_line}</tr>")
html_table_lines.append("</table>")
return "\n".join(html_table_lines)
def main():
print("start listening...")
while True:
tsv = pyperclip.paste()
if "\n" in tsv and "\t" in tsv:
# possibly a table?
print("table detected!")
data = tsv_to_html_table(tsv)
PutHtml(data)
if GetHtml() == data:
print("new table copied!")
else:
print("failed")
sleep(1)
if __name__ == "__main__":
main()