/*- * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * Copyright (c) 1992, 1993, 1994, 1995, 1996 * Keith Bostic. All rights reserved. * * See the LICENSE file for redistribution information. */ #include "config.h" #ifndef lint static const char sccsid[] = "@(#)v_word.c 10.5 (Berkeley) 3/6/96"; #endif /* not lint */ #include #include #include #include #include #include #include #include "../common/common.h" #include "vi.h" /* * There are two types of "words". Bigwords are easy -- groups of anything * delimited by whitespace. Normal words are trickier. They are either a * group of characters, numbers and underscores, or a group of anything but, * delimited by whitespace. When for a word, if you're in whitespace, it's * easy, just remove the whitespace and go to the beginning or end of the * word. Otherwise, figure out if the next character is in a different group. * If it is, go to the beginning or end of that group, otherwise, go to the * beginning or end of the current group. The historic version of vi didn't * get this right, so, for example, there were cases where "4e" was not the * same as "eeee" -- in particular, single character words, and commands that * began in whitespace were almost always handled incorrectly. To get it right * you have to resolve the cursor after each search so that the look-ahead to * figure out what type of "word" the cursor is in will be correct. * * Empty lines, and lines that consist of only white-space characters count * as a single word, and the beginning and end of the file counts as an * infinite number of words. * * Movements associated with commands are different than movement commands. * For example, in "abc def", with the cursor on the 'a', "cw" is from * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white * space is discarded from the change movement. Another example is that, * in the same string, a "cw" on any white space character replaces that * single character, and nothing else. Ain't nothin' in here that's easy. * * One historic note -- in the original vi, the 'w', 'W' and 'B' commands * would treat groups of empty lines as individual words, i.e. the command * would move the cursor to each new empty line. The 'e' and 'E' commands * would treat groups of empty lines as a single word, i.e. the first use * would move past the group of lines. The 'b' command would just beep at * you, or, if you did it from the start of the line as part of a motion * command, go absolutely nuts. If the lines contained only white-space * characters, the 'w' and 'W' commands would just beep at you, and the 'B', * 'b', 'E' and 'e' commands would treat the group as a single word, and * the 'B' and 'b' commands will treat the lines as individual words. This * implementation treats all of these cases as a single white-space word. */ enum which {BIGWORD, LITTLEWORD}; static int bword __P((SCR *, VICMD *, enum which)); static int eword __P((SCR *, VICMD *, enum which)); static int fword __P((SCR *, VICMD *, enum which)); /* * v_wordW -- [count]W * Move forward a bigword at a time. * * PUBLIC: int v_wordW __P((SCR *, VICMD *)); */ int v_wordW(sp, vp) SCR *sp; VICMD *vp; { return (fword(sp, vp, BIGWORD)); } /* * v_wordw -- [count]w * Move forward a word at a time. * * PUBLIC: int v_wordw __P((SCR *, VICMD *)); */ int v_wordw(sp, vp) SCR *sp; VICMD *vp; { return (fword(sp, vp, LITTLEWORD)); } /* * fword -- * Move forward by words. */ static int fword(sp, vp, type) SCR *sp; VICMD *vp; enum which type; { enum { INWORD, NOTWORD } state; VCS cs; u_long cnt; cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; cs.cs_lno = vp->m_start.lno; cs.cs_cno = vp->m_start.cno; if (cs_init(sp, &cs)) return (1); /* * If in white-space: * If the count is 1, and it's a change command, we're done. * Else, move to the first non-white-space character, which * counts as a single word move. If it's a motion command, * don't move off the end of the line. */ if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) { if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { if (ISCMD(vp->rkp, 'c')) return (0); if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { if (cs_fspace(sp, &cs)) return (1); goto ret; } } if (cs_fblank(sp, &cs)) return (1); --cnt; } /* * Cyclically move to the next word -- this involves skipping * over word characters and then any trailing non-word characters. * Note, for the 'w' command, the definition of a word keeps * switching. */ if (type == BIGWORD) while (cnt--) { for (;;) { if (cs_next(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; if (cs.cs_flags != 0 || isblank(cs.cs_ch)) break; } /* * If a motion command and we're at the end of the * last word, we're done. Delete and yank eat any * trailing blanks, but we don't move off the end * of the line regardless. */ if (cnt == 0 && ISMOTION(vp)) { if ((ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) && cs_fspace(sp, &cs)) return (1); break; } /* Eat whitespace characters. */ if (cs_fblank(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; } else while (cnt--) { state = cs.cs_flags == 0 && inword(cs.cs_ch) ? INWORD : NOTWORD; for (;;) { if (cs_next(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; if (cs.cs_flags != 0 || isblank(cs.cs_ch)) break; if (state == INWORD) { if (!inword(cs.cs_ch)) break; } else if (inword(cs.cs_ch)) break; } /* See comment above. */ if (cnt == 0 && ISMOTION(vp)) { if ((ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) && cs_fspace(sp, &cs)) return (1); break; } /* Eat whitespace characters. */ if (cs.cs_flags != 0 || isblank(cs.cs_ch)) if (cs_fblank(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; } /* * If we didn't move, we must be at EOF. * * !!! * That's okay for motion commands, however. */ ret: if (!ISMOTION(vp) && cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { v_eof(sp, &vp->m_start); return (1); } /* Adjust the end of the range for motion commands. */ vp->m_stop.lno = cs.cs_lno; vp->m_stop.cno = cs.cs_cno; if (ISMOTION(vp) && cs.cs_flags == 0) --vp->m_stop.cno; /* * Non-motion commands move to the end of the range. Delete * and yank stay at the start, ignore others. */ vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; return (0); } /* * v_wordE -- [count]E * Move forward to the end of the bigword. * * PUBLIC: int v_wordE __P((SCR *, VICMD *)); */ int v_wordE(sp, vp) SCR *sp; VICMD *vp; { return (eword(sp, vp, BIGWORD)); } /* * v_worde -- [count]e * Move forward to the end of the word. * * PUBLIC: int v_worde __P((SCR *, VICMD *)); */ int v_worde(sp, vp) SCR *sp; VICMD *vp; { return (eword(sp, vp, LITTLEWORD)); } /* * eword -- * Move forward to the end of the word. */ static int eword(sp, vp, type) SCR *sp; VICMD *vp; enum which type; { enum { INWORD, NOTWORD } state; VCS cs; u_long cnt; cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; cs.cs_lno = vp->m_start.lno; cs.cs_cno = vp->m_start.cno; if (cs_init(sp, &cs)) return (1); /* * !!! * If in whitespace, or the next character is whitespace, move past * it. (This doesn't count as a word move.) Stay at the character * past the current one, it sets word "state" for the 'e' command. */ if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { if (cs_next(sp, &cs)) return (1); if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) goto start; } if (cs_fblank(sp, &cs)) return (1); /* * Cyclically move to the next word -- this involves skipping * over word characters and then any trailing non-word characters. * Note, for the 'e' command, the definition of a word keeps * switching. */ start: if (type == BIGWORD) while (cnt--) { for (;;) { if (cs_next(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; if (cs.cs_flags != 0 || isblank(cs.cs_ch)) break; } /* * When we reach the start of the word after the last * word, we're done. If we changed state, back up one * to the end of the previous word. */ if (cnt == 0) { if (cs.cs_flags == 0 && cs_prev(sp, &cs)) return (1); break; } /* Eat whitespace characters. */ if (cs_fblank(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; } else while (cnt--) { state = cs.cs_flags == 0 && inword(cs.cs_ch) ? INWORD : NOTWORD; for (;;) { if (cs_next(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; if (cs.cs_flags != 0 || isblank(cs.cs_ch)) break; if (state == INWORD) { if (!inword(cs.cs_ch)) break; } else if (inword(cs.cs_ch)) break; } /* See comment above. */ if (cnt == 0) { if (cs.cs_flags == 0 && cs_prev(sp, &cs)) return (1); break; } /* Eat whitespace characters. */ if (cs.cs_flags != 0 || isblank(cs.cs_ch)) if (cs_fblank(sp, &cs)) return (1); if (cs.cs_flags == CS_EOF) goto ret; } /* * If we didn't move, we must be at EOF. * * !!! * That's okay for motion commands, however. */ ret: if (!ISMOTION(vp) && cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { v_eof(sp, &vp->m_start); return (1); } /* Set the end of the range for motion commands. */ vp->m_stop.lno = cs.cs_lno; vp->m_stop.cno = cs.cs_cno; /* * Non-motion commands move to the end of the range. * Delete and yank stay at the start, ignore others. */ vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; return (0); } /* * v_WordB -- [count]B * Move backward a bigword at a time. * * PUBLIC: int v_wordB __P((SCR *, VICMD *)); */ int v_wordB(sp, vp) SCR *sp; VICMD *vp; { return (bword(sp, vp, BIGWORD)); } /* * v_wordb -- [count]b * Move backward a word at a time. * * PUBLIC: int v_wordb __P((SCR *, VICMD *)); */ int v_wordb(sp, vp) SCR *sp; VICMD *vp; { return (bword(sp, vp, LITTLEWORD)); } /* * bword -- * Move backward by words. */ static int bword(sp, vp, type) SCR *sp; VICMD *vp; enum which type; { enum { INWORD, NOTWORD } state; VCS cs; u_long cnt; cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; cs.cs_lno = vp->m_start.lno; cs.cs_cno = vp->m_start.cno; if (cs_init(sp, &cs)) return (1); /* * !!! * If in whitespace, or the previous character is whitespace, move * past it. (This doesn't count as a word move.) Stay at the * character before the current one, it sets word "state" for the * 'b' command. */ if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { if (cs_prev(sp, &cs)) return (1); if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) goto start; } if (cs_bblank(sp, &cs)) return (1); /* * Cyclically move to the beginning of the previous word -- this * involves skipping over word characters and then any trailing * non-word characters. Note, for the 'b' command, the definition * of a word keeps switching. */ start: if (type == BIGWORD) while (cnt--) { for (;;) { if (cs_prev(sp, &cs)) return (1); if (cs.cs_flags == CS_SOF) goto ret; if (cs.cs_flags != 0 || isblank(cs.cs_ch)) break; } /* * When we reach the end of the word before the last * word, we're done. If we changed state, move forward * one to the end of the next word. */ if (cnt == 0) { if (cs.cs_flags == 0 && cs_next(sp, &cs)) return (1); break; } /* Eat whitespace characters. */ if (cs_bblank(sp, &cs)) return (1); if (cs.cs_flags == CS_SOF) goto ret; } else while (cnt--) { state = cs.cs_flags == 0 && inword(cs.cs_ch) ? INWORD : NOTWORD; for (;;) { if (cs_prev(sp, &cs)) return (1); if (cs.cs_flags == CS_SOF) goto ret; if (cs.cs_flags != 0 || isblank(cs.cs_ch)) break; if (state == INWORD) { if (!inword(cs.cs_ch)) break; } else if (inword(cs.cs_ch)) break; } /* See comment above. */ if (cnt == 0) { if (cs.cs_flags == 0 && cs_next(sp, &cs)) return (1); break; } /* Eat whitespace characters. */ if (cs.cs_flags != 0 || isblank(cs.cs_ch)) if (cs_bblank(sp, &cs)) return (1); if (cs.cs_flags == CS_SOF) goto ret; } /* If we didn't move, we must be at SOF. */ ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { v_sof(sp, &vp->m_start); return (1); } /* Set the end of the range for motion commands. */ vp->m_stop.lno = cs.cs_lno; vp->m_stop.cno = cs.cs_cno; /* * All commands move to the end of the range. Motion commands * adjust the starting point to the character before the current * one. * * !!! * The historic vi didn't get this right -- the `yb' command yanked * the right stuff and even updated the cursor value, but the cursor * was not actually updated on the screen. */ vp->m_final = vp->m_stop; if (ISMOTION(vp)) --vp->m_start.cno; return (0); }