1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | def insertDialog(db, documents): cur = db.cursor() deleteQaSql = "DELETE FROM qa" try : cur.execute(deleteQaSql) db.commit() except Exception as e: print ( "Error %s while execute: %s" % (e, deleteQaSql)) 。。。 try : cur.execute(executeSql) db.commit() except Exception as e: print ( "Error %s for execute sql: %s" % (e, executeSql)) db.rollback() curId + = 1 continue dialogList = mongoDialogs() insertDialog(db, dialogList) db.close() |
出错:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py" , line 286, in <module> main() File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py" , line 282, in main insertDialog(db, dialogList) File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py" , line 129, in insertDialog for document in documents: File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py" , line 69, in mongoDialogs for document in collection. find (): File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py" , line 1169, in next if len(self.__data) or self._refresh(): File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py" , line 1106, in _refresh self.__send_message(g) File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py" , line 975, in __send_message helpers._check_command_response(first) File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/helpers.py" , line 142, in _check_command_response raise CursorNotFound(errmsg, code, response) pymongo.errors.CursorNotFound: Cursor not found, cursor id : 31763154712 |
python – pymongo.errors.CursorNotFound: cursor id ‘…’ not valid at server – Stack Overflow
超时导致的?
估计是我debug,暂定调试时间太长,导致cursor失效?
pymongo.errors.CursorNotFound: Cursor not found – 简书
pymongo.errors.CursorNotFound: Cursor not found, cursor id: 124891571478 – CSDN博客
【pymongo】mongodb cursor id not valid error – 匡子语 – 博客园
重新去调试,不debug了,看看联系执行,确保在10分钟内,是否会超时
然后后来由于后续代码运行时间太长,也没法,不方便验证是否正常。
总之知道了是这个问题就好了。
后续如果再遇到,可以去设置连接永远不超时即可。
【后记 20180629】
后来本地连接Mongo调试又出现同样错误:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | File "/Users/crifan/dev/dev_root/xxx/processData/mysqlQa/mongoDialogToMysqlQa.py" , line 262, in insertDialog for document in dialogGenerator: File "/Users/crifan/dev/dev_root/xxx/processData/mysqlQa/mongoDialogToMysqlQa.py" , line 109, in mongoDialogGenerator for eachDialog in dialogCollection. find (): File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py" , line 1169, in next if len(self.__data) or self._refresh(): File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py" , line 1106, in _refresh self.__send_message(g) File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py" , line 975, in __send_message helpers._check_command_response(first) File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/helpers.py" , line 142, in _check_command_response raise CursorNotFound(errmsg, code, response) pymongo.errors.CursorNotFound: cursor id 31095472518 not found |
很明显是:
mongo连接超时了。
所以必须是要去设置超时时间(增加,或者永久不超时)
pymongo.errors.CursorNotFound: cursor id not found
python – pymongo.errors.CursorNotFound: cursor id ‘…’ not valid at server – Stack Overflow
此处调试期间,干脆简单点去:
timeout=False
no_cursor_timeout=True
之类的吧
python – pymongo.errors.CursorNotFound: cursor id ‘…’ not found at server – Stack Overflow
mongodb – CursorNotFound error in Pymongo – Stack Overflow
pymongo.errors.CursorNotFound: Cursor not found – 简书
pymongo.errors.CursorNotFound: Cursor not found, cursor id: 124891571478 – CSDN博客
mongodb pymongo.errors.CursorNotFound: Cursor not found, cursor id: 82792803897 – shaomine – 博客园
【pymongo】mongodb cursor id not valid error – 匡子语 – 博客园
改为:
1 2 3 4 5 | # for eachDialog in dialogCollection.find(): cursor = dialogCollection.find(no_cursor_timeout = True ) for eachDialog in cursor: yield eachDialog cursor.close() |
看看效果
抽空再完整跑一遍。
最后确定了,是可以的:
运行了半天(4,5个小时)中间没有再出现错误。
如果不行,到时候再去改为:
1 | .batch_size( 1 ) |
试试
或者参考:
https://www.cnblogs.com/dplearning/p/6052291.html
试试:
1 | .find(timeout = False ) |
【总结】
此处pymongo报错:
pymongo.errors.CursorNotFound: Cursor not found
原因是:
此处的:
1 | for eachDialog in dialogCollection.find(): |
find会返回多个数据
而这些数据的处理时间,超过了默认最长时间10分钟,所以超时报错了。
解决办法是:
去掉超时时间的设置,改为:
1 2 3 4 5 | # for eachDialog in dialogCollection.find(): cursor = dialogCollection.find(no_cursor_timeout = True ) for eachDialog in cursor: yield eachDialog cursor.close() |
即可。
【后记20210831】
根据官网最新文档:
cursor.noCursorTimeout() — MongoDB Manual
no_cursor_timeout=True
已经无效了。
应该改为:
- 先新建一个会话,后续所有操作,都关联上、使用,此会话。
- 每隔一段时间,再去刷新会话
示例代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | import logging from datetime import datetime import pymongo # every 10 minutes to update session once # Note: should less than 30 minutes = Mongo session defaul timeout time # RefreshSessionPerSeconds = 10 * 60 RefreshSessionPerSeconds = 8 * 60 def mergeHistorResultToNewCollection(): mongoSession = mongoClient.start_session() # <pymongo.client_session.ClientSession object at 0x1081c5c70> mongoSessionId = mongoSession.session_id # {'id': Binary(b'\xbf\xd8\xd...1\xbb', 4)} mongoDb = mongoSession.client[ "your_db_name" ] # Database(MongoClient(host=['127.0.0.1:27017'], document_class=dict, tz_aware=False, connect=True), 'your_db_name') mongoCollectionOld = mongoDb[ "collecion_old" ] mongoCollectionNew = mongoDb[ 'collecion_new' ] # historyAllResultCursor = mongoCollectionOld.find(session=mongoSession) historyAllResultCursor = mongoCollectionOld.find(no_cursor_timeout = True , session = mongoSession) lastUpdateTime = datetime.now() # datetime.datetime(2021, 8, 30, 10, 57, 14, 579328) for curIdx, oldHistoryResult in enumerate (historyAllResultCursor): curTime = datetime.now() # datetime.datetime(2021, 8, 30, 10, 57, 25, 110374) elapsedTime = curTime - lastUpdateTime # datetime.timedelta(seconds=10, microseconds=531046) elapsedTimeSeconds = elapsedTime.total_seconds() # 2.65892 isShouldUpdateSession = elapsedTimeSeconds > RefreshSessionPerSeconds # if (curIdx % RefreshSessionPerNum) == 0: if isShouldUpdateSession: lastUpdateTime = curTime cmdResp = mongoDb.command( "refreshSessions" , [mongoSessionId], session = mongoSession) logging.info( "Called refreshSessions command, resp=%s" , cmdResp) # do what you want # mongoSession.close() mongoSession.end_session() |
相关参考文档:
collection – Collection level operations — PyMongo 3.12.0 documentation
database – Database level operations — PyMongo 3.12.0 documentation
refreshSessions — MongoDB Manual
cursor.noCursorTimeout() — MongoDB Manual
详细解释,可参考我回复的帖子:
node.js – MongoDB – Error: getMore command failed: Cursor not found – Stack Overflow
转载请注明:在路上 » 【已解决】python中mongo出错:pymongo.errors.CursorNotFound: Cursor not found